1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) 7#include <openssl/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 12.fpu neon 13.code 32 14#undef __thumb2__ 15.align 5 16.Lrcon: 17.long 0x01,0x01,0x01,0x01 18.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 19.long 0x1b,0x1b,0x1b,0x1b 20 21.text 22 23.globl aes_hw_set_encrypt_key 24.hidden aes_hw_set_encrypt_key 25.type aes_hw_set_encrypt_key,%function 26.align 5 27aes_hw_set_encrypt_key: 28.Lenc_key: 29 mov r3,#-2 30 cmp r1,#128 31 blt .Lenc_key_abort 32 cmp r1,#256 33 bgt .Lenc_key_abort 34 tst r1,#0x3f 35 bne .Lenc_key_abort 36 37 adr r3,.Lrcon 38 cmp r1,#192 39 40 veor q0,q0,q0 41 vld1.8 {q3},[r0]! 42 mov r1,#8 @ reuse r1 43 vld1.32 {q1,q2},[r3]! 44 45 blt .Loop128 46 beq .L192 47 b .L256 48 49.align 4 50.Loop128: 51 vtbl.8 d20,{q3},d4 52 vtbl.8 d21,{q3},d5 53 vext.8 q9,q0,q3,#12 54 vst1.32 {q3},[r2]! 55.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 56 subs r1,r1,#1 57 58 veor q3,q3,q9 59 vext.8 q9,q0,q9,#12 60 veor q3,q3,q9 61 vext.8 q9,q0,q9,#12 62 veor q10,q10,q1 63 veor q3,q3,q9 64 vshl.u8 q1,q1,#1 65 veor q3,q3,q10 66 bne .Loop128 67 68 vld1.32 {q1},[r3] 69 70 vtbl.8 d20,{q3},d4 71 vtbl.8 d21,{q3},d5 72 vext.8 q9,q0,q3,#12 73 vst1.32 {q3},[r2]! 74.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 75 76 veor q3,q3,q9 77 vext.8 q9,q0,q9,#12 78 veor q3,q3,q9 79 vext.8 q9,q0,q9,#12 80 veor q10,q10,q1 81 veor q3,q3,q9 82 vshl.u8 q1,q1,#1 83 veor q3,q3,q10 84 85 vtbl.8 d20,{q3},d4 86 vtbl.8 d21,{q3},d5 87 vext.8 q9,q0,q3,#12 88 vst1.32 {q3},[r2]! 89.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 90 91 veor q3,q3,q9 92 vext.8 q9,q0,q9,#12 93 veor q3,q3,q9 94 vext.8 q9,q0,q9,#12 95 veor q10,q10,q1 96 veor q3,q3,q9 97 veor q3,q3,q10 98 vst1.32 {q3},[r2] 99 add r2,r2,#0x50 100 101 mov r12,#10 102 b .Ldone 103 104.align 4 105.L192: 106 vld1.8 {d16},[r0]! 107 vmov.i8 q10,#8 @ borrow q10 108 vst1.32 {q3},[r2]! 109 vsub.i8 q2,q2,q10 @ adjust the mask 110 111.Loop192: 112 vtbl.8 d20,{q8},d4 113 vtbl.8 d21,{q8},d5 114 vext.8 q9,q0,q3,#12 115 vst1.32 {d16},[r2]! 116.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 117 subs r1,r1,#1 118 119 veor q3,q3,q9 120 vext.8 q9,q0,q9,#12 121 veor q3,q3,q9 122 vext.8 q9,q0,q9,#12 123 veor q3,q3,q9 124 125 vdup.32 q9,d7[1] 126 veor q9,q9,q8 127 veor q10,q10,q1 128 vext.8 q8,q0,q8,#12 129 vshl.u8 q1,q1,#1 130 veor q8,q8,q9 131 veor q3,q3,q10 132 veor q8,q8,q10 133 vst1.32 {q3},[r2]! 134 bne .Loop192 135 136 mov r12,#12 137 add r2,r2,#0x20 138 b .Ldone 139 140.align 4 141.L256: 142 vld1.8 {q8},[r0] 143 mov r1,#7 144 mov r12,#14 145 vst1.32 {q3},[r2]! 146 147.Loop256: 148 vtbl.8 d20,{q8},d4 149 vtbl.8 d21,{q8},d5 150 vext.8 q9,q0,q3,#12 151 vst1.32 {q8},[r2]! 152.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 153 subs r1,r1,#1 154 155 veor q3,q3,q9 156 vext.8 q9,q0,q9,#12 157 veor q3,q3,q9 158 vext.8 q9,q0,q9,#12 159 veor q10,q10,q1 160 veor q3,q3,q9 161 vshl.u8 q1,q1,#1 162 veor q3,q3,q10 163 vst1.32 {q3},[r2]! 164 beq .Ldone 165 166 vdup.32 q10,d7[1] 167 vext.8 q9,q0,q8,#12 168.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 169 170 veor q8,q8,q9 171 vext.8 q9,q0,q9,#12 172 veor q8,q8,q9 173 vext.8 q9,q0,q9,#12 174 veor q8,q8,q9 175 176 veor q8,q8,q10 177 b .Loop256 178 179.Ldone: 180 str r12,[r2] 181 mov r3,#0 182 183.Lenc_key_abort: 184 mov r0,r3 @ return value 185 186 bx lr 187.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 188 189.globl aes_hw_set_decrypt_key 190.hidden aes_hw_set_decrypt_key 191.type aes_hw_set_decrypt_key,%function 192.align 5 193aes_hw_set_decrypt_key: 194 stmdb sp!,{r4,lr} 195 bl .Lenc_key 196 197 cmp r0,#0 198 bne .Ldec_key_abort 199 200 sub r2,r2,#240 @ restore original r2 201 mov r4,#-16 202 add r0,r2,r12,lsl#4 @ end of key schedule 203 204 vld1.32 {q0},[r2] 205 vld1.32 {q1},[r0] 206 vst1.32 {q0},[r0],r4 207 vst1.32 {q1},[r2]! 208 209.Loop_imc: 210 vld1.32 {q0},[r2] 211 vld1.32 {q1},[r0] 212.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 213.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 214 vst1.32 {q0},[r0],r4 215 vst1.32 {q1},[r2]! 216 cmp r0,r2 217 bhi .Loop_imc 218 219 vld1.32 {q0},[r2] 220.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 221 vst1.32 {q0},[r0] 222 223 eor r0,r0,r0 @ return value 224.Ldec_key_abort: 225 ldmia sp!,{r4,pc} 226.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 227.globl aes_hw_encrypt 228.hidden aes_hw_encrypt 229.type aes_hw_encrypt,%function 230.align 5 231aes_hw_encrypt: 232 AARCH64_VALID_CALL_TARGET 233 ldr r3,[r2,#240] 234 vld1.32 {q0},[r2]! 235 vld1.8 {q2},[r0] 236 sub r3,r3,#2 237 vld1.32 {q1},[r2]! 238 239.Loop_enc: 240.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 241.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 242 vld1.32 {q0},[r2]! 243 subs r3,r3,#2 244.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 245.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 246 vld1.32 {q1},[r2]! 247 bgt .Loop_enc 248 249.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 250.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 251 vld1.32 {q0},[r2] 252.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 253 veor q2,q2,q0 254 255 vst1.8 {q2},[r1] 256 bx lr 257.size aes_hw_encrypt,.-aes_hw_encrypt 258.globl aes_hw_decrypt 259.hidden aes_hw_decrypt 260.type aes_hw_decrypt,%function 261.align 5 262aes_hw_decrypt: 263 AARCH64_VALID_CALL_TARGET 264 ldr r3,[r2,#240] 265 vld1.32 {q0},[r2]! 266 vld1.8 {q2},[r0] 267 sub r3,r3,#2 268 vld1.32 {q1},[r2]! 269 270.Loop_dec: 271.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 272.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 273 vld1.32 {q0},[r2]! 274 subs r3,r3,#2 275.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 276.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 277 vld1.32 {q1},[r2]! 278 bgt .Loop_dec 279 280.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 281.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 282 vld1.32 {q0},[r2] 283.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 284 veor q2,q2,q0 285 286 vst1.8 {q2},[r1] 287 bx lr 288.size aes_hw_decrypt,.-aes_hw_decrypt 289.globl aes_hw_cbc_encrypt 290.hidden aes_hw_cbc_encrypt 291.type aes_hw_cbc_encrypt,%function 292.align 5 293aes_hw_cbc_encrypt: 294 mov ip,sp 295 stmdb sp!,{r4,r5,r6,r7,r8,lr} 296 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 297 ldmia ip,{r4,r5} @ load remaining args 298 subs r2,r2,#16 299 mov r8,#16 300 blo .Lcbc_abort 301 moveq r8,#0 302 303 cmp r5,#0 @ en- or decrypting? 304 ldr r5,[r3,#240] 305 and r2,r2,#-16 306 vld1.8 {q6},[r4] 307 vld1.8 {q0},[r0],r8 308 309 vld1.32 {q8,q9},[r3] @ load key schedule... 310 sub r5,r5,#6 311 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 312 sub r5,r5,#2 313 vld1.32 {q10,q11},[r7]! 314 vld1.32 {q12,q13},[r7]! 315 vld1.32 {q14,q15},[r7]! 316 vld1.32 {q7},[r7] 317 318 add r7,r3,#32 319 mov r6,r5 320 beq .Lcbc_dec 321 322 cmp r5,#2 323 veor q0,q0,q6 324 veor q5,q8,q7 325 beq .Lcbc_enc128 326 327 vld1.32 {q2,q3},[r7] 328 add r7,r3,#16 329 add r6,r3,#16*4 330 add r12,r3,#16*5 331.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 332.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 333 add r14,r3,#16*6 334 add r3,r3,#16*7 335 b .Lenter_cbc_enc 336 337.align 4 338.Loop_cbc_enc: 339.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 340.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 341 vst1.8 {q6},[r1]! 342.Lenter_cbc_enc: 343.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 344.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 345.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 346.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 347 vld1.32 {q8},[r6] 348 cmp r5,#4 349.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 350.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 351 vld1.32 {q9},[r12] 352 beq .Lcbc_enc192 353 354.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 355.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 vld1.32 {q8},[r14] 357.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 358.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 359 vld1.32 {q9},[r3] 360 nop 361 362.Lcbc_enc192: 363.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 364.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 365 subs r2,r2,#16 366.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 367.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 368 moveq r8,#0 369.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 370.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 371.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 372.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 vld1.8 {q8},[r0],r8 374.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 375.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 376 veor q8,q8,q5 377.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 378.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 379 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 380.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 381.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 382.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 383 veor q6,q0,q7 384 bhs .Loop_cbc_enc 385 386 vst1.8 {q6},[r1]! 387 b .Lcbc_done 388 389.align 5 390.Lcbc_enc128: 391 vld1.32 {q2,q3},[r7] 392.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 393.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 394 b .Lenter_cbc_enc128 395.Loop_cbc_enc128: 396.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 397.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 398 vst1.8 {q6},[r1]! 399.Lenter_cbc_enc128: 400.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 401.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 402 subs r2,r2,#16 403.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 404.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 405 moveq r8,#0 406.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 407.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 409.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 410.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 411.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 412 vld1.8 {q8},[r0],r8 413.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 416.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 417.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 418.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 419 veor q8,q8,q5 420.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 421 veor q6,q0,q7 422 bhs .Loop_cbc_enc128 423 424 vst1.8 {q6},[r1]! 425 b .Lcbc_done 426.align 5 427.Lcbc_dec: 428 vld1.8 {q10},[r0]! 429 subs r2,r2,#32 @ bias 430 add r6,r5,#2 431 vorr q3,q0,q0 432 vorr q1,q0,q0 433 vorr q11,q10,q10 434 blo .Lcbc_dec_tail 435 436 vorr q1,q10,q10 437 vld1.8 {q10},[r0]! 438 vorr q2,q0,q0 439 vorr q3,q1,q1 440 vorr q11,q10,q10 441 442.Loop3x_cbc_dec: 443.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 444.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 445.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 446.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 447.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 448.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 449 vld1.32 {q8},[r7]! 450 subs r6,r6,#2 451.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 452.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 453.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 454.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 455.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 456.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 457 vld1.32 {q9},[r7]! 458 bgt .Loop3x_cbc_dec 459 460.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 461.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 462.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 463.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 464.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 465.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 466 veor q4,q6,q7 467 subs r2,r2,#0x30 468 veor q5,q2,q7 469 movlo r6,r2 @ r6, r6, is zero at this point 470.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 471.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 472.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 473.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 474.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 475.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 476 veor q9,q3,q7 477 add r0,r0,r6 @ r0 is adjusted in such way that 478 @ at exit from the loop q1-q10 479 @ are loaded with last "words" 480 vorr q6,q11,q11 481 mov r7,r3 482.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 483.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 484.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 485.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 486.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 487.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 488 vld1.8 {q2},[r0]! 489.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 490.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 491.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 492.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 493.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 494.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 495 vld1.8 {q3},[r0]! 496.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 497.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 498.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 499.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 500.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 501.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 502 vld1.8 {q11},[r0]! 503.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 504.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 505.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 506 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 507 add r6,r5,#2 508 veor q4,q4,q0 509 veor q5,q5,q1 510 veor q10,q10,q9 511 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 512 vst1.8 {q4},[r1]! 513 vorr q0,q2,q2 514 vst1.8 {q5},[r1]! 515 vorr q1,q3,q3 516 vst1.8 {q10},[r1]! 517 vorr q10,q11,q11 518 bhs .Loop3x_cbc_dec 519 520 cmn r2,#0x30 521 beq .Lcbc_done 522 nop 523 524.Lcbc_dec_tail: 525.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 526.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 527.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 528.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 529 vld1.32 {q8},[r7]! 530 subs r6,r6,#2 531.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 532.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 533.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 534.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 535 vld1.32 {q9},[r7]! 536 bgt .Lcbc_dec_tail 537 538.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 539.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 540.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 541.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 542.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 543.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 544.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 545.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 546.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 547.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 548.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 549.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 550 cmn r2,#0x20 551.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 553.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 555 veor q5,q6,q7 556.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 557.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 558.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 559.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 560 veor q9,q3,q7 561.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 562.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 563 beq .Lcbc_dec_one 564 veor q5,q5,q1 565 veor q9,q9,q10 566 vorr q6,q11,q11 567 vst1.8 {q5},[r1]! 568 vst1.8 {q9},[r1]! 569 b .Lcbc_done 570 571.Lcbc_dec_one: 572 veor q5,q5,q10 573 vorr q6,q11,q11 574 vst1.8 {q5},[r1]! 575 576.Lcbc_done: 577 vst1.8 {q6},[r4] 578.Lcbc_abort: 579 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 580 ldmia sp!,{r4,r5,r6,r7,r8,pc} 581.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 582.globl aes_hw_ctr32_encrypt_blocks 583.hidden aes_hw_ctr32_encrypt_blocks 584.type aes_hw_ctr32_encrypt_blocks,%function 585.align 5 586aes_hw_ctr32_encrypt_blocks: 587 mov ip,sp 588 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 589 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 590 ldr r4, [ip] @ load remaining arg 591 ldr r5,[r3,#240] 592 593 ldr r8, [r4, #12] 594 vld1.32 {q0},[r4] 595 596 vld1.32 {q8,q9},[r3] @ load key schedule... 597 sub r5,r5,#4 598 mov r12,#16 599 cmp r2,#2 600 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 601 sub r5,r5,#2 602 vld1.32 {q12,q13},[r7]! 603 vld1.32 {q14,q15},[r7]! 604 vld1.32 {q7},[r7] 605 add r7,r3,#32 606 mov r6,r5 607 movlo r12,#0 608 609 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 610 @ affected by silicon errata #1742098 [0] and #1655431 [1], 611 @ respectively, where the second instruction of an aese/aesmc 612 @ instruction pair may execute twice if an interrupt is taken right 613 @ after the first instruction consumes an input register of which a 614 @ single 32-bit lane has been updated the last time it was modified. 615 @ 616 @ This function uses a counter in one 32-bit lane. The 617 @ could write to q1 and q10 directly, but that trips this bugs. 618 @ We write to q6 and copy to the final register as a workaround. 619 @ 620 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 621 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 622#ifndef __ARMEB__ 623 rev r8, r8 624#endif 625 add r10, r8, #1 626 vorr q6,q0,q0 627 rev r10, r10 628 vmov.32 d13[1],r10 629 add r8, r8, #2 630 vorr q1,q6,q6 631 bls .Lctr32_tail 632 rev r12, r8 633 vmov.32 d13[1],r12 634 sub r2,r2,#3 @ bias 635 vorr q10,q6,q6 636 b .Loop3x_ctr32 637 638.align 4 639.Loop3x_ctr32: 640.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 641.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 642.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 643.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 644.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 645.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 646 vld1.32 {q8},[r7]! 647 subs r6,r6,#2 648.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 649.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 650.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 651.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 652.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 653.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 654 vld1.32 {q9},[r7]! 655 bgt .Loop3x_ctr32 656 657.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 658.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 659.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 660.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 661 vld1.8 {q2},[r0]! 662 add r9,r8,#1 663.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 664.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 665 vld1.8 {q3},[r0]! 666 rev r9,r9 667.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 668.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 669.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 670.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 671 vld1.8 {q11},[r0]! 672 mov r7,r3 673.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 674.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 675.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 676.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 677.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 678.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 679 veor q2,q2,q7 680 add r10,r8,#2 681.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 682.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 683 veor q3,q3,q7 684 add r8,r8,#3 685.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 686.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 687.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 688.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 689 @ Note the logic to update q0, q1, and q1 is written to work 690 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 691 @ 32-bit mode. See the comment above. 692 veor q11,q11,q7 693 vmov.32 d13[1], r9 694.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 695.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 696 vorr q0,q6,q6 697 rev r10,r10 698.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 699.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 700 vmov.32 d13[1], r10 701 rev r12,r8 702.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 703.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 704 vorr q1,q6,q6 705 vmov.32 d13[1], r12 706.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 707.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 708 vorr q10,q6,q6 709 subs r2,r2,#3 710.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 711.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 712.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 713 714 veor q2,q2,q4 715 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 716 vst1.8 {q2},[r1]! 717 veor q3,q3,q5 718 mov r6,r5 719 vst1.8 {q3},[r1]! 720 veor q11,q11,q9 721 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 722 vst1.8 {q11},[r1]! 723 bhs .Loop3x_ctr32 724 725 adds r2,r2,#3 726 beq .Lctr32_done 727 cmp r2,#1 728 mov r12,#16 729 moveq r12,#0 730 731.Lctr32_tail: 732.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 733.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 734.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 735.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 736 vld1.32 {q8},[r7]! 737 subs r6,r6,#2 738.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 739.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 740.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 741.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 742 vld1.32 {q9},[r7]! 743 bgt .Lctr32_tail 744 745.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 746.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 747.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 748.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 749.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 750.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 751.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 752.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 753 vld1.8 {q2},[r0],r12 754.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 755.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 756.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 757.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 758 vld1.8 {q3},[r0] 759.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 760.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 761.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 762.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 763 veor q2,q2,q7 764.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 765.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 766.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 767.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 768 veor q3,q3,q7 769.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 770.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 771 772 cmp r2,#1 773 veor q2,q2,q0 774 veor q3,q3,q1 775 vst1.8 {q2},[r1]! 776 beq .Lctr32_done 777 vst1.8 {q3},[r1] 778 779.Lctr32_done: 780 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 781 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 782.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 783#endif 784#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) 785