1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) 7#include <ring-core/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) 12.fpu neon 13.code 32 14#undef __thumb2__ 15.align 5 16.Lrcon: 17.long 0x01,0x01,0x01,0x01 18.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 19.long 0x1b,0x1b,0x1b,0x1b 20 21.text 22 23.globl aes_hw_set_encrypt_key 24.hidden aes_hw_set_encrypt_key 25.type aes_hw_set_encrypt_key,%function 26.align 5 27aes_hw_set_encrypt_key: 28.Lenc_key: 29 mov r3,#-1 30 cmp r0,#0 31 beq .Lenc_key_abort 32 cmp r2,#0 33 beq .Lenc_key_abort 34 mov r3,#-2 35 cmp r1,#128 36 blt .Lenc_key_abort 37 cmp r1,#256 38 bgt .Lenc_key_abort 39 tst r1,#0x3f 40 bne .Lenc_key_abort 41 42 adr r3,.Lrcon 43 cmp r1,#192 44 45 veor q0,q0,q0 46 vld1.8 {q3},[r0]! 47 mov r1,#8 @ reuse r1 48 vld1.32 {q1,q2},[r3]! 49 50 blt .Loop128 51 @ 192-bit key support was removed. 52 b .L256 53 54.align 4 55.Loop128: 56 vtbl.8 d20,{q3},d4 57 vtbl.8 d21,{q3},d5 58 vext.8 q9,q0,q3,#12 59 vst1.32 {q3},[r2]! 60.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 61 subs r1,r1,#1 62 63 veor q3,q3,q9 64 vext.8 q9,q0,q9,#12 65 veor q3,q3,q9 66 vext.8 q9,q0,q9,#12 67 veor q10,q10,q1 68 veor q3,q3,q9 69 vshl.u8 q1,q1,#1 70 veor q3,q3,q10 71 bne .Loop128 72 73 vld1.32 {q1},[r3] 74 75 vtbl.8 d20,{q3},d4 76 vtbl.8 d21,{q3},d5 77 vext.8 q9,q0,q3,#12 78 vst1.32 {q3},[r2]! 79.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 80 81 veor q3,q3,q9 82 vext.8 q9,q0,q9,#12 83 veor q3,q3,q9 84 vext.8 q9,q0,q9,#12 85 veor q10,q10,q1 86 veor q3,q3,q9 87 vshl.u8 q1,q1,#1 88 veor q3,q3,q10 89 90 vtbl.8 d20,{q3},d4 91 vtbl.8 d21,{q3},d5 92 vext.8 q9,q0,q3,#12 93 vst1.32 {q3},[r2]! 94.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 95 96 veor q3,q3,q9 97 vext.8 q9,q0,q9,#12 98 veor q3,q3,q9 99 vext.8 q9,q0,q9,#12 100 veor q10,q10,q1 101 veor q3,q3,q9 102 veor q3,q3,q10 103 vst1.32 {q3},[r2] 104 add r2,r2,#0x50 105 106 mov r12,#10 107 b .Ldone 108 109@ 192-bit key support was removed. 110 111.align 4 112.L256: 113 vld1.8 {q8},[r0] 114 mov r1,#7 115 mov r12,#14 116 vst1.32 {q3},[r2]! 117 118.Loop256: 119 vtbl.8 d20,{q8},d4 120 vtbl.8 d21,{q8},d5 121 vext.8 q9,q0,q3,#12 122 vst1.32 {q8},[r2]! 123.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 124 subs r1,r1,#1 125 126 veor q3,q3,q9 127 vext.8 q9,q0,q9,#12 128 veor q3,q3,q9 129 vext.8 q9,q0,q9,#12 130 veor q10,q10,q1 131 veor q3,q3,q9 132 vshl.u8 q1,q1,#1 133 veor q3,q3,q10 134 vst1.32 {q3},[r2]! 135 beq .Ldone 136 137 vdup.32 q10,d7[1] 138 vext.8 q9,q0,q8,#12 139.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 140 141 veor q8,q8,q9 142 vext.8 q9,q0,q9,#12 143 veor q8,q8,q9 144 vext.8 q9,q0,q9,#12 145 veor q8,q8,q9 146 147 veor q8,q8,q10 148 b .Loop256 149 150.Ldone: 151 str r12,[r2] 152 mov r3,#0 153 154.Lenc_key_abort: 155 mov r0,r3 @ return value 156 157 bx lr 158.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 159.globl aes_hw_encrypt 160.hidden aes_hw_encrypt 161.type aes_hw_encrypt,%function 162.align 5 163aes_hw_encrypt: 164 AARCH64_VALID_CALL_TARGET 165 ldr r3,[r2,#240] 166 vld1.32 {q0},[r2]! 167 vld1.8 {q2},[r0] 168 sub r3,r3,#2 169 vld1.32 {q1},[r2]! 170 171.Loop_enc: 172.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 173.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 174 vld1.32 {q0},[r2]! 175 subs r3,r3,#2 176.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 177.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 178 vld1.32 {q1},[r2]! 179 bgt .Loop_enc 180 181.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 182.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 183 vld1.32 {q0},[r2] 184.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 185 veor q2,q2,q0 186 187 vst1.8 {q2},[r1] 188 bx lr 189.size aes_hw_encrypt,.-aes_hw_encrypt 190.globl aes_hw_ctr32_encrypt_blocks 191.hidden aes_hw_ctr32_encrypt_blocks 192.type aes_hw_ctr32_encrypt_blocks,%function 193.align 5 194aes_hw_ctr32_encrypt_blocks: 195 mov ip,sp 196 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 197 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 198 ldr r4, [ip] @ load remaining arg 199 ldr r5,[r3,#240] 200 201 ldr r8, [r4, #12] 202 vld1.32 {q0},[r4] 203 204 vld1.32 {q8,q9},[r3] @ load key schedule... 205 sub r5,r5,#4 206 mov r12,#16 207 cmp r2,#2 208 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 209 sub r5,r5,#2 210 vld1.32 {q12,q13},[r7]! 211 vld1.32 {q14,q15},[r7]! 212 vld1.32 {q7},[r7] 213 add r7,r3,#32 214 mov r6,r5 215 movlo r12,#0 216 217 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 218 @ affected by silicon errata #1742098 [0] and #1655431 [1], 219 @ respectively, where the second instruction of an aese/aesmc 220 @ instruction pair may execute twice if an interrupt is taken right 221 @ after the first instruction consumes an input register of which a 222 @ single 32-bit lane has been updated the last time it was modified. 223 @ 224 @ This function uses a counter in one 32-bit lane. The 225 @ could write to q1 and q10 directly, but that trips this bugs. 226 @ We write to q6 and copy to the final register as a workaround. 227 @ 228 @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 229 @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 230#ifndef __ARMEB__ 231 rev r8, r8 232#endif 233 add r10, r8, #1 234 vorr q6,q0,q0 235 rev r10, r10 236 vmov.32 d13[1],r10 237 add r8, r8, #2 238 vorr q1,q6,q6 239 bls .Lctr32_tail 240 rev r12, r8 241 vmov.32 d13[1],r12 242 sub r2,r2,#3 @ bias 243 vorr q10,q6,q6 244 b .Loop3x_ctr32 245 246.align 4 247.Loop3x_ctr32: 248.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 249.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 250.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 251.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 252.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 253.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 254 vld1.32 {q8},[r7]! 255 subs r6,r6,#2 256.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 257.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 258.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 259.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 260.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 261.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 262 vld1.32 {q9},[r7]! 263 bgt .Loop3x_ctr32 264 265.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 266.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 267.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 268.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 269 vld1.8 {q2},[r0]! 270 add r9,r8,#1 271.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 272.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 273 vld1.8 {q3},[r0]! 274 rev r9,r9 275.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 276.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 277.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 278.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 279 vld1.8 {q11},[r0]! 280 mov r7,r3 281.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 282.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 283.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 284.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 285.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 286.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 287 veor q2,q2,q7 288 add r10,r8,#2 289.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 290.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 291 veor q3,q3,q7 292 add r8,r8,#3 293.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 294.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 295.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 296.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 297 @ Note the logic to update q0, q1, and q1 is written to work 298 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 299 @ 32-bit mode. See the comment above. 300 veor q11,q11,q7 301 vmov.32 d13[1], r9 302.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 303.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 304 vorr q0,q6,q6 305 rev r10,r10 306.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 307.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 308 vmov.32 d13[1], r10 309 rev r12,r8 310.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 311.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 312 vorr q1,q6,q6 313 vmov.32 d13[1], r12 314.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 315.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 316 vorr q10,q6,q6 317 subs r2,r2,#3 318.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 319.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 320.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 321 322 veor q2,q2,q4 323 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 324 vst1.8 {q2},[r1]! 325 veor q3,q3,q5 326 mov r6,r5 327 vst1.8 {q3},[r1]! 328 veor q11,q11,q9 329 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 330 vst1.8 {q11},[r1]! 331 bhs .Loop3x_ctr32 332 333 adds r2,r2,#3 334 beq .Lctr32_done 335 cmp r2,#1 336 mov r12,#16 337 moveq r12,#0 338 339.Lctr32_tail: 340.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 341.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 342.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 343.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 344 vld1.32 {q8},[r7]! 345 subs r6,r6,#2 346.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 347.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 348.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 349.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 350 vld1.32 {q9},[r7]! 351 bgt .Lctr32_tail 352 353.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 354.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 355.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 356.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 357.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 358.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 359.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 360.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 361 vld1.8 {q2},[r0],r12 362.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 365.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 366 vld1.8 {q3},[r0] 367.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 368.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 369.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 370.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 371 veor q2,q2,q7 372.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 373.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 374.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 375.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 376 veor q3,q3,q7 377.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 378.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 379 380 cmp r2,#1 381 veor q2,q2,q0 382 veor q3,q3,q1 383 vst1.8 {q2},[r1]! 384 beq .Lctr32_done 385 vst1.8 {q3},[r1] 386 387.Lctr32_done: 388 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 389 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 390.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 391#endif 392#endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__) 393