1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv8-a+crypto 12.section .rodata 13.align 5 14.Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl aes_hw_set_encrypt_key 22.hidden aes_hw_set_encrypt_key 23.type aes_hw_set_encrypt_key,%function 24.align 5 25aes_hw_set_encrypt_key: 26.Lenc_key: 27 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 28 AARCH64_VALID_CALL_TARGET 29 stp x29,x30,[sp,#-16]! 30 add x29,sp,#0 31 mov x3,#-2 32 cmp w1,#128 33 b.lt .Lenc_key_abort 34 cmp w1,#256 35 b.gt .Lenc_key_abort 36 tst w1,#0x3f 37 b.ne .Lenc_key_abort 38 39 adrp x3,.Lrcon 40 add x3,x3,:lo12:.Lrcon 41 cmp w1,#192 42 43 eor v0.16b,v0.16b,v0.16b 44 ld1 {v3.16b},[x0],#16 45 mov w1,#8 // reuse w1 46 ld1 {v1.4s,v2.4s},[x3],#32 47 48 b.lt .Loop128 49 b.eq .L192 50 b .L256 51 52.align 4 53.Loop128: 54 tbl v6.16b,{v3.16b},v2.16b 55 ext v5.16b,v0.16b,v3.16b,#12 56 st1 {v3.4s},[x2],#16 57 aese v6.16b,v0.16b 58 subs w1,w1,#1 59 60 eor v3.16b,v3.16b,v5.16b 61 ext v5.16b,v0.16b,v5.16b,#12 62 eor v3.16b,v3.16b,v5.16b 63 ext v5.16b,v0.16b,v5.16b,#12 64 eor v6.16b,v6.16b,v1.16b 65 eor v3.16b,v3.16b,v5.16b 66 shl v1.16b,v1.16b,#1 67 eor v3.16b,v3.16b,v6.16b 68 b.ne .Loop128 69 70 ld1 {v1.4s},[x3] 71 72 tbl v6.16b,{v3.16b},v2.16b 73 ext v5.16b,v0.16b,v3.16b,#12 74 st1 {v3.4s},[x2],#16 75 aese v6.16b,v0.16b 76 77 eor v3.16b,v3.16b,v5.16b 78 ext v5.16b,v0.16b,v5.16b,#12 79 eor v3.16b,v3.16b,v5.16b 80 ext v5.16b,v0.16b,v5.16b,#12 81 eor v6.16b,v6.16b,v1.16b 82 eor v3.16b,v3.16b,v5.16b 83 shl v1.16b,v1.16b,#1 84 eor v3.16b,v3.16b,v6.16b 85 86 tbl v6.16b,{v3.16b},v2.16b 87 ext v5.16b,v0.16b,v3.16b,#12 88 st1 {v3.4s},[x2],#16 89 aese v6.16b,v0.16b 90 91 eor v3.16b,v3.16b,v5.16b 92 ext v5.16b,v0.16b,v5.16b,#12 93 eor v3.16b,v3.16b,v5.16b 94 ext v5.16b,v0.16b,v5.16b,#12 95 eor v6.16b,v6.16b,v1.16b 96 eor v3.16b,v3.16b,v5.16b 97 eor v3.16b,v3.16b,v6.16b 98 st1 {v3.4s},[x2] 99 add x2,x2,#0x50 100 101 mov w12,#10 102 b .Ldone 103 104.align 4 105.L192: 106 ld1 {v4.8b},[x0],#8 107 movi v6.16b,#8 // borrow v6.16b 108 st1 {v3.4s},[x2],#16 109 sub v2.16b,v2.16b,v6.16b // adjust the mask 110 111.Loop192: 112 tbl v6.16b,{v4.16b},v2.16b 113 ext v5.16b,v0.16b,v3.16b,#12 114 st1 {v4.8b},[x2],#8 115 aese v6.16b,v0.16b 116 subs w1,w1,#1 117 118 eor v3.16b,v3.16b,v5.16b 119 ext v5.16b,v0.16b,v5.16b,#12 120 eor v3.16b,v3.16b,v5.16b 121 ext v5.16b,v0.16b,v5.16b,#12 122 eor v3.16b,v3.16b,v5.16b 123 124 dup v5.4s,v3.s[3] 125 eor v5.16b,v5.16b,v4.16b 126 eor v6.16b,v6.16b,v1.16b 127 ext v4.16b,v0.16b,v4.16b,#12 128 shl v1.16b,v1.16b,#1 129 eor v4.16b,v4.16b,v5.16b 130 eor v3.16b,v3.16b,v6.16b 131 eor v4.16b,v4.16b,v6.16b 132 st1 {v3.4s},[x2],#16 133 b.ne .Loop192 134 135 mov w12,#12 136 add x2,x2,#0x20 137 b .Ldone 138 139.align 4 140.L256: 141 ld1 {v4.16b},[x0] 142 mov w1,#7 143 mov w12,#14 144 st1 {v3.4s},[x2],#16 145 146.Loop256: 147 tbl v6.16b,{v4.16b},v2.16b 148 ext v5.16b,v0.16b,v3.16b,#12 149 st1 {v4.4s},[x2],#16 150 aese v6.16b,v0.16b 151 subs w1,w1,#1 152 153 eor v3.16b,v3.16b,v5.16b 154 ext v5.16b,v0.16b,v5.16b,#12 155 eor v3.16b,v3.16b,v5.16b 156 ext v5.16b,v0.16b,v5.16b,#12 157 eor v6.16b,v6.16b,v1.16b 158 eor v3.16b,v3.16b,v5.16b 159 shl v1.16b,v1.16b,#1 160 eor v3.16b,v3.16b,v6.16b 161 st1 {v3.4s},[x2],#16 162 b.eq .Ldone 163 164 dup v6.4s,v3.s[3] // just splat 165 ext v5.16b,v0.16b,v4.16b,#12 166 aese v6.16b,v0.16b 167 168 eor v4.16b,v4.16b,v5.16b 169 ext v5.16b,v0.16b,v5.16b,#12 170 eor v4.16b,v4.16b,v5.16b 171 ext v5.16b,v0.16b,v5.16b,#12 172 eor v4.16b,v4.16b,v5.16b 173 174 eor v4.16b,v4.16b,v6.16b 175 b .Loop256 176 177.Ldone: 178 str w12,[x2] 179 mov x3,#0 180 181.Lenc_key_abort: 182 mov x0,x3 // return value 183 ldr x29,[sp],#16 184 ret 185.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 186 187.globl aes_hw_set_decrypt_key 188.hidden aes_hw_set_decrypt_key 189.type aes_hw_set_decrypt_key,%function 190.align 5 191aes_hw_set_decrypt_key: 192 AARCH64_SIGN_LINK_REGISTER 193 stp x29,x30,[sp,#-16]! 194 add x29,sp,#0 195 bl .Lenc_key 196 197 cmp x0,#0 198 b.ne .Ldec_key_abort 199 200 sub x2,x2,#240 // restore original x2 201 mov x4,#-16 202 add x0,x2,x12,lsl#4 // end of key schedule 203 204 ld1 {v0.4s},[x2] 205 ld1 {v1.4s},[x0] 206 st1 {v0.4s},[x0],x4 207 st1 {v1.4s},[x2],#16 208 209.Loop_imc: 210 ld1 {v0.4s},[x2] 211 ld1 {v1.4s},[x0] 212 aesimc v0.16b,v0.16b 213 aesimc v1.16b,v1.16b 214 st1 {v0.4s},[x0],x4 215 st1 {v1.4s},[x2],#16 216 cmp x0,x2 217 b.hi .Loop_imc 218 219 ld1 {v0.4s},[x2] 220 aesimc v0.16b,v0.16b 221 st1 {v0.4s},[x0] 222 223 eor x0,x0,x0 // return value 224.Ldec_key_abort: 225 ldp x29,x30,[sp],#16 226 AARCH64_VALIDATE_LINK_REGISTER 227 ret 228.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 229.globl aes_hw_encrypt 230.hidden aes_hw_encrypt 231.type aes_hw_encrypt,%function 232.align 5 233aes_hw_encrypt: 234 AARCH64_VALID_CALL_TARGET 235 ldr w3,[x2,#240] 236 ld1 {v0.4s},[x2],#16 237 ld1 {v2.16b},[x0] 238 sub w3,w3,#2 239 ld1 {v1.4s},[x2],#16 240 241.Loop_enc: 242 aese v2.16b,v0.16b 243 aesmc v2.16b,v2.16b 244 ld1 {v0.4s},[x2],#16 245 subs w3,w3,#2 246 aese v2.16b,v1.16b 247 aesmc v2.16b,v2.16b 248 ld1 {v1.4s},[x2],#16 249 b.gt .Loop_enc 250 251 aese v2.16b,v0.16b 252 aesmc v2.16b,v2.16b 253 ld1 {v0.4s},[x2] 254 aese v2.16b,v1.16b 255 eor v2.16b,v2.16b,v0.16b 256 257 st1 {v2.16b},[x1] 258 ret 259.size aes_hw_encrypt,.-aes_hw_encrypt 260.globl aes_hw_decrypt 261.hidden aes_hw_decrypt 262.type aes_hw_decrypt,%function 263.align 5 264aes_hw_decrypt: 265 AARCH64_VALID_CALL_TARGET 266 ldr w3,[x2,#240] 267 ld1 {v0.4s},[x2],#16 268 ld1 {v2.16b},[x0] 269 sub w3,w3,#2 270 ld1 {v1.4s},[x2],#16 271 272.Loop_dec: 273 aesd v2.16b,v0.16b 274 aesimc v2.16b,v2.16b 275 ld1 {v0.4s},[x2],#16 276 subs w3,w3,#2 277 aesd v2.16b,v1.16b 278 aesimc v2.16b,v2.16b 279 ld1 {v1.4s},[x2],#16 280 b.gt .Loop_dec 281 282 aesd v2.16b,v0.16b 283 aesimc v2.16b,v2.16b 284 ld1 {v0.4s},[x2] 285 aesd v2.16b,v1.16b 286 eor v2.16b,v2.16b,v0.16b 287 288 st1 {v2.16b},[x1] 289 ret 290.size aes_hw_decrypt,.-aes_hw_decrypt 291.globl aes_hw_cbc_encrypt 292.hidden aes_hw_cbc_encrypt 293.type aes_hw_cbc_encrypt,%function 294.align 5 295aes_hw_cbc_encrypt: 296 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 297 AARCH64_VALID_CALL_TARGET 298 stp x29,x30,[sp,#-16]! 299 add x29,sp,#0 300 subs x2,x2,#16 301 mov x8,#16 302 b.lo .Lcbc_abort 303 csel x8,xzr,x8,eq 304 305 cmp w5,#0 // en- or decrypting? 306 ldr w5,[x3,#240] 307 and x2,x2,#-16 308 ld1 {v6.16b},[x4] 309 ld1 {v0.16b},[x0],x8 310 311 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 312 sub w5,w5,#6 313 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 314 sub w5,w5,#2 315 ld1 {v18.4s,v19.4s},[x7],#32 316 ld1 {v20.4s,v21.4s},[x7],#32 317 ld1 {v22.4s,v23.4s},[x7],#32 318 ld1 {v7.4s},[x7] 319 320 add x7,x3,#32 321 mov w6,w5 322 b.eq .Lcbc_dec 323 324 cmp w5,#2 325 eor v0.16b,v0.16b,v6.16b 326 eor v5.16b,v16.16b,v7.16b 327 b.eq .Lcbc_enc128 328 329 ld1 {v2.4s,v3.4s},[x7] 330 add x7,x3,#16 331 add x6,x3,#16*4 332 add x12,x3,#16*5 333 aese v0.16b,v16.16b 334 aesmc v0.16b,v0.16b 335 add x14,x3,#16*6 336 add x3,x3,#16*7 337 b .Lenter_cbc_enc 338 339.align 4 340.Loop_cbc_enc: 341 aese v0.16b,v16.16b 342 aesmc v0.16b,v0.16b 343 st1 {v6.16b},[x1],#16 344.Lenter_cbc_enc: 345 aese v0.16b,v17.16b 346 aesmc v0.16b,v0.16b 347 aese v0.16b,v2.16b 348 aesmc v0.16b,v0.16b 349 ld1 {v16.4s},[x6] 350 cmp w5,#4 351 aese v0.16b,v3.16b 352 aesmc v0.16b,v0.16b 353 ld1 {v17.4s},[x12] 354 b.eq .Lcbc_enc192 355 356 aese v0.16b,v16.16b 357 aesmc v0.16b,v0.16b 358 ld1 {v16.4s},[x14] 359 aese v0.16b,v17.16b 360 aesmc v0.16b,v0.16b 361 ld1 {v17.4s},[x3] 362 nop 363 364.Lcbc_enc192: 365 aese v0.16b,v16.16b 366 aesmc v0.16b,v0.16b 367 subs x2,x2,#16 368 aese v0.16b,v17.16b 369 aesmc v0.16b,v0.16b 370 csel x8,xzr,x8,eq 371 aese v0.16b,v18.16b 372 aesmc v0.16b,v0.16b 373 aese v0.16b,v19.16b 374 aesmc v0.16b,v0.16b 375 ld1 {v16.16b},[x0],x8 376 aese v0.16b,v20.16b 377 aesmc v0.16b,v0.16b 378 eor v16.16b,v16.16b,v5.16b 379 aese v0.16b,v21.16b 380 aesmc v0.16b,v0.16b 381 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 382 aese v0.16b,v22.16b 383 aesmc v0.16b,v0.16b 384 aese v0.16b,v23.16b 385 eor v6.16b,v0.16b,v7.16b 386 b.hs .Loop_cbc_enc 387 388 st1 {v6.16b},[x1],#16 389 b .Lcbc_done 390 391.align 5 392.Lcbc_enc128: 393 ld1 {v2.4s,v3.4s},[x7] 394 aese v0.16b,v16.16b 395 aesmc v0.16b,v0.16b 396 b .Lenter_cbc_enc128 397.Loop_cbc_enc128: 398 aese v0.16b,v16.16b 399 aesmc v0.16b,v0.16b 400 st1 {v6.16b},[x1],#16 401.Lenter_cbc_enc128: 402 aese v0.16b,v17.16b 403 aesmc v0.16b,v0.16b 404 subs x2,x2,#16 405 aese v0.16b,v2.16b 406 aesmc v0.16b,v0.16b 407 csel x8,xzr,x8,eq 408 aese v0.16b,v3.16b 409 aesmc v0.16b,v0.16b 410 aese v0.16b,v18.16b 411 aesmc v0.16b,v0.16b 412 aese v0.16b,v19.16b 413 aesmc v0.16b,v0.16b 414 ld1 {v16.16b},[x0],x8 415 aese v0.16b,v20.16b 416 aesmc v0.16b,v0.16b 417 aese v0.16b,v21.16b 418 aesmc v0.16b,v0.16b 419 aese v0.16b,v22.16b 420 aesmc v0.16b,v0.16b 421 eor v16.16b,v16.16b,v5.16b 422 aese v0.16b,v23.16b 423 eor v6.16b,v0.16b,v7.16b 424 b.hs .Loop_cbc_enc128 425 426 st1 {v6.16b},[x1],#16 427 b .Lcbc_done 428.align 5 429.Lcbc_dec: 430 ld1 {v18.16b},[x0],#16 431 subs x2,x2,#32 // bias 432 add w6,w5,#2 433 orr v3.16b,v0.16b,v0.16b 434 orr v1.16b,v0.16b,v0.16b 435 orr v19.16b,v18.16b,v18.16b 436 b.lo .Lcbc_dec_tail 437 438 orr v1.16b,v18.16b,v18.16b 439 ld1 {v18.16b},[x0],#16 440 orr v2.16b,v0.16b,v0.16b 441 orr v3.16b,v1.16b,v1.16b 442 orr v19.16b,v18.16b,v18.16b 443 444.Loop3x_cbc_dec: 445 aesd v0.16b,v16.16b 446 aesimc v0.16b,v0.16b 447 aesd v1.16b,v16.16b 448 aesimc v1.16b,v1.16b 449 aesd v18.16b,v16.16b 450 aesimc v18.16b,v18.16b 451 ld1 {v16.4s},[x7],#16 452 subs w6,w6,#2 453 aesd v0.16b,v17.16b 454 aesimc v0.16b,v0.16b 455 aesd v1.16b,v17.16b 456 aesimc v1.16b,v1.16b 457 aesd v18.16b,v17.16b 458 aesimc v18.16b,v18.16b 459 ld1 {v17.4s},[x7],#16 460 b.gt .Loop3x_cbc_dec 461 462 aesd v0.16b,v16.16b 463 aesimc v0.16b,v0.16b 464 aesd v1.16b,v16.16b 465 aesimc v1.16b,v1.16b 466 aesd v18.16b,v16.16b 467 aesimc v18.16b,v18.16b 468 eor v4.16b,v6.16b,v7.16b 469 subs x2,x2,#0x30 470 eor v5.16b,v2.16b,v7.16b 471 csel x6,x2,x6,lo // x6, w6, is zero at this point 472 aesd v0.16b,v17.16b 473 aesimc v0.16b,v0.16b 474 aesd v1.16b,v17.16b 475 aesimc v1.16b,v1.16b 476 aesd v18.16b,v17.16b 477 aesimc v18.16b,v18.16b 478 eor v17.16b,v3.16b,v7.16b 479 add x0,x0,x6 // x0 is adjusted in such way that 480 // at exit from the loop v1.16b-v18.16b 481 // are loaded with last "words" 482 orr v6.16b,v19.16b,v19.16b 483 mov x7,x3 484 aesd v0.16b,v20.16b 485 aesimc v0.16b,v0.16b 486 aesd v1.16b,v20.16b 487 aesimc v1.16b,v1.16b 488 aesd v18.16b,v20.16b 489 aesimc v18.16b,v18.16b 490 ld1 {v2.16b},[x0],#16 491 aesd v0.16b,v21.16b 492 aesimc v0.16b,v0.16b 493 aesd v1.16b,v21.16b 494 aesimc v1.16b,v1.16b 495 aesd v18.16b,v21.16b 496 aesimc v18.16b,v18.16b 497 ld1 {v3.16b},[x0],#16 498 aesd v0.16b,v22.16b 499 aesimc v0.16b,v0.16b 500 aesd v1.16b,v22.16b 501 aesimc v1.16b,v1.16b 502 aesd v18.16b,v22.16b 503 aesimc v18.16b,v18.16b 504 ld1 {v19.16b},[x0],#16 505 aesd v0.16b,v23.16b 506 aesd v1.16b,v23.16b 507 aesd v18.16b,v23.16b 508 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 509 add w6,w5,#2 510 eor v4.16b,v4.16b,v0.16b 511 eor v5.16b,v5.16b,v1.16b 512 eor v18.16b,v18.16b,v17.16b 513 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 514 st1 {v4.16b},[x1],#16 515 orr v0.16b,v2.16b,v2.16b 516 st1 {v5.16b},[x1],#16 517 orr v1.16b,v3.16b,v3.16b 518 st1 {v18.16b},[x1],#16 519 orr v18.16b,v19.16b,v19.16b 520 b.hs .Loop3x_cbc_dec 521 522 cmn x2,#0x30 523 b.eq .Lcbc_done 524 nop 525 526.Lcbc_dec_tail: 527 aesd v1.16b,v16.16b 528 aesimc v1.16b,v1.16b 529 aesd v18.16b,v16.16b 530 aesimc v18.16b,v18.16b 531 ld1 {v16.4s},[x7],#16 532 subs w6,w6,#2 533 aesd v1.16b,v17.16b 534 aesimc v1.16b,v1.16b 535 aesd v18.16b,v17.16b 536 aesimc v18.16b,v18.16b 537 ld1 {v17.4s},[x7],#16 538 b.gt .Lcbc_dec_tail 539 540 aesd v1.16b,v16.16b 541 aesimc v1.16b,v1.16b 542 aesd v18.16b,v16.16b 543 aesimc v18.16b,v18.16b 544 aesd v1.16b,v17.16b 545 aesimc v1.16b,v1.16b 546 aesd v18.16b,v17.16b 547 aesimc v18.16b,v18.16b 548 aesd v1.16b,v20.16b 549 aesimc v1.16b,v1.16b 550 aesd v18.16b,v20.16b 551 aesimc v18.16b,v18.16b 552 cmn x2,#0x20 553 aesd v1.16b,v21.16b 554 aesimc v1.16b,v1.16b 555 aesd v18.16b,v21.16b 556 aesimc v18.16b,v18.16b 557 eor v5.16b,v6.16b,v7.16b 558 aesd v1.16b,v22.16b 559 aesimc v1.16b,v1.16b 560 aesd v18.16b,v22.16b 561 aesimc v18.16b,v18.16b 562 eor v17.16b,v3.16b,v7.16b 563 aesd v1.16b,v23.16b 564 aesd v18.16b,v23.16b 565 b.eq .Lcbc_dec_one 566 eor v5.16b,v5.16b,v1.16b 567 eor v17.16b,v17.16b,v18.16b 568 orr v6.16b,v19.16b,v19.16b 569 st1 {v5.16b},[x1],#16 570 st1 {v17.16b},[x1],#16 571 b .Lcbc_done 572 573.Lcbc_dec_one: 574 eor v5.16b,v5.16b,v18.16b 575 orr v6.16b,v19.16b,v19.16b 576 st1 {v5.16b},[x1],#16 577 578.Lcbc_done: 579 st1 {v6.16b},[x4] 580.Lcbc_abort: 581 ldr x29,[sp],#16 582 ret 583.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 584.globl aes_hw_ctr32_encrypt_blocks 585.hidden aes_hw_ctr32_encrypt_blocks 586.type aes_hw_ctr32_encrypt_blocks,%function 587.align 5 588aes_hw_ctr32_encrypt_blocks: 589 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 590 AARCH64_VALID_CALL_TARGET 591 stp x29,x30,[sp,#-16]! 592 add x29,sp,#0 593 ldr w5,[x3,#240] 594 595 ldr w8, [x4, #12] 596 ld1 {v0.4s},[x4] 597 598 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 599 sub w5,w5,#4 600 mov x12,#16 601 cmp x2,#2 602 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 603 sub w5,w5,#2 604 ld1 {v20.4s,v21.4s},[x7],#32 605 ld1 {v22.4s,v23.4s},[x7],#32 606 ld1 {v7.4s},[x7] 607 add x7,x3,#32 608 mov w6,w5 609 csel x12,xzr,x12,lo 610 611 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 612 // affected by silicon errata #1742098 [0] and #1655431 [1], 613 // respectively, where the second instruction of an aese/aesmc 614 // instruction pair may execute twice if an interrupt is taken right 615 // after the first instruction consumes an input register of which a 616 // single 32-bit lane has been updated the last time it was modified. 617 // 618 // This function uses a counter in one 32-bit lane. The vmov lines 619 // could write to v1.16b and v18.16b directly, but that trips this bugs. 620 // We write to v6.16b and copy to the final register as a workaround. 621 // 622 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 623 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 624#ifndef __AARCH64EB__ 625 rev w8, w8 626#endif 627 add w10, w8, #1 628 orr v6.16b,v0.16b,v0.16b 629 rev w10, w10 630 mov v6.s[3],w10 631 add w8, w8, #2 632 orr v1.16b,v6.16b,v6.16b 633 b.ls .Lctr32_tail 634 rev w12, w8 635 mov v6.s[3],w12 636 sub x2,x2,#3 // bias 637 orr v18.16b,v6.16b,v6.16b 638 b .Loop3x_ctr32 639 640.align 4 641.Loop3x_ctr32: 642 aese v0.16b,v16.16b 643 aesmc v0.16b,v0.16b 644 aese v1.16b,v16.16b 645 aesmc v1.16b,v1.16b 646 aese v18.16b,v16.16b 647 aesmc v18.16b,v18.16b 648 ld1 {v16.4s},[x7],#16 649 subs w6,w6,#2 650 aese v0.16b,v17.16b 651 aesmc v0.16b,v0.16b 652 aese v1.16b,v17.16b 653 aesmc v1.16b,v1.16b 654 aese v18.16b,v17.16b 655 aesmc v18.16b,v18.16b 656 ld1 {v17.4s},[x7],#16 657 b.gt .Loop3x_ctr32 658 659 aese v0.16b,v16.16b 660 aesmc v4.16b,v0.16b 661 aese v1.16b,v16.16b 662 aesmc v5.16b,v1.16b 663 ld1 {v2.16b},[x0],#16 664 add w9,w8,#1 665 aese v18.16b,v16.16b 666 aesmc v18.16b,v18.16b 667 ld1 {v3.16b},[x0],#16 668 rev w9,w9 669 aese v4.16b,v17.16b 670 aesmc v4.16b,v4.16b 671 aese v5.16b,v17.16b 672 aesmc v5.16b,v5.16b 673 ld1 {v19.16b},[x0],#16 674 mov x7,x3 675 aese v18.16b,v17.16b 676 aesmc v17.16b,v18.16b 677 aese v4.16b,v20.16b 678 aesmc v4.16b,v4.16b 679 aese v5.16b,v20.16b 680 aesmc v5.16b,v5.16b 681 eor v2.16b,v2.16b,v7.16b 682 add w10,w8,#2 683 aese v17.16b,v20.16b 684 aesmc v17.16b,v17.16b 685 eor v3.16b,v3.16b,v7.16b 686 add w8,w8,#3 687 aese v4.16b,v21.16b 688 aesmc v4.16b,v4.16b 689 aese v5.16b,v21.16b 690 aesmc v5.16b,v5.16b 691 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 692 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 693 // 32-bit mode. See the comment above. 694 eor v19.16b,v19.16b,v7.16b 695 mov v6.s[3], w9 696 aese v17.16b,v21.16b 697 aesmc v17.16b,v17.16b 698 orr v0.16b,v6.16b,v6.16b 699 rev w10,w10 700 aese v4.16b,v22.16b 701 aesmc v4.16b,v4.16b 702 mov v6.s[3], w10 703 rev w12,w8 704 aese v5.16b,v22.16b 705 aesmc v5.16b,v5.16b 706 orr v1.16b,v6.16b,v6.16b 707 mov v6.s[3], w12 708 aese v17.16b,v22.16b 709 aesmc v17.16b,v17.16b 710 orr v18.16b,v6.16b,v6.16b 711 subs x2,x2,#3 712 aese v4.16b,v23.16b 713 aese v5.16b,v23.16b 714 aese v17.16b,v23.16b 715 716 eor v2.16b,v2.16b,v4.16b 717 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 718 st1 {v2.16b},[x1],#16 719 eor v3.16b,v3.16b,v5.16b 720 mov w6,w5 721 st1 {v3.16b},[x1],#16 722 eor v19.16b,v19.16b,v17.16b 723 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 724 st1 {v19.16b},[x1],#16 725 b.hs .Loop3x_ctr32 726 727 adds x2,x2,#3 728 b.eq .Lctr32_done 729 cmp x2,#1 730 mov x12,#16 731 csel x12,xzr,x12,eq 732 733.Lctr32_tail: 734 aese v0.16b,v16.16b 735 aesmc v0.16b,v0.16b 736 aese v1.16b,v16.16b 737 aesmc v1.16b,v1.16b 738 ld1 {v16.4s},[x7],#16 739 subs w6,w6,#2 740 aese v0.16b,v17.16b 741 aesmc v0.16b,v0.16b 742 aese v1.16b,v17.16b 743 aesmc v1.16b,v1.16b 744 ld1 {v17.4s},[x7],#16 745 b.gt .Lctr32_tail 746 747 aese v0.16b,v16.16b 748 aesmc v0.16b,v0.16b 749 aese v1.16b,v16.16b 750 aesmc v1.16b,v1.16b 751 aese v0.16b,v17.16b 752 aesmc v0.16b,v0.16b 753 aese v1.16b,v17.16b 754 aesmc v1.16b,v1.16b 755 ld1 {v2.16b},[x0],x12 756 aese v0.16b,v20.16b 757 aesmc v0.16b,v0.16b 758 aese v1.16b,v20.16b 759 aesmc v1.16b,v1.16b 760 ld1 {v3.16b},[x0] 761 aese v0.16b,v21.16b 762 aesmc v0.16b,v0.16b 763 aese v1.16b,v21.16b 764 aesmc v1.16b,v1.16b 765 eor v2.16b,v2.16b,v7.16b 766 aese v0.16b,v22.16b 767 aesmc v0.16b,v0.16b 768 aese v1.16b,v22.16b 769 aesmc v1.16b,v1.16b 770 eor v3.16b,v3.16b,v7.16b 771 aese v0.16b,v23.16b 772 aese v1.16b,v23.16b 773 774 cmp x2,#1 775 eor v2.16b,v2.16b,v0.16b 776 eor v3.16b,v3.16b,v1.16b 777 st1 {v2.16b},[x1],#16 778 b.eq .Lctr32_done 779 st1 {v3.16b},[x1] 780 781.Lctr32_done: 782 ldr x29,[sp],#16 783 ret 784.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 785#endif 786#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) 787