1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) 7#include <ring-core/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv8-a+crypto 12.section .rodata 13.align 5 14.Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl aes_hw_set_encrypt_key 22.hidden aes_hw_set_encrypt_key 23.type aes_hw_set_encrypt_key,%function 24.align 5 25aes_hw_set_encrypt_key: 26.Lenc_key: 27 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 28 AARCH64_VALID_CALL_TARGET 29 stp x29,x30,[sp,#-16]! 30 add x29,sp,#0 31 mov x3,#-1 32 cmp x0,#0 33 b.eq .Lenc_key_abort 34 cmp x2,#0 35 b.eq .Lenc_key_abort 36 mov x3,#-2 37 cmp w1,#128 38 b.lt .Lenc_key_abort 39 cmp w1,#256 40 b.gt .Lenc_key_abort 41 tst w1,#0x3f 42 b.ne .Lenc_key_abort 43 44 adrp x3,.Lrcon 45 add x3,x3,:lo12:.Lrcon 46 cmp w1,#192 47 48 eor v0.16b,v0.16b,v0.16b 49 ld1 {v3.16b},[x0],#16 50 mov w1,#8 // reuse w1 51 ld1 {v1.4s,v2.4s},[x3],#32 52 53 b.lt .Loop128 54 // 192-bit key support was removed. 55 b .L256 56 57.align 4 58.Loop128: 59 tbl v6.16b,{v3.16b},v2.16b 60 ext v5.16b,v0.16b,v3.16b,#12 61 st1 {v3.4s},[x2],#16 62 aese v6.16b,v0.16b 63 subs w1,w1,#1 64 65 eor v3.16b,v3.16b,v5.16b 66 ext v5.16b,v0.16b,v5.16b,#12 67 eor v3.16b,v3.16b,v5.16b 68 ext v5.16b,v0.16b,v5.16b,#12 69 eor v6.16b,v6.16b,v1.16b 70 eor v3.16b,v3.16b,v5.16b 71 shl v1.16b,v1.16b,#1 72 eor v3.16b,v3.16b,v6.16b 73 b.ne .Loop128 74 75 ld1 {v1.4s},[x3] 76 77 tbl v6.16b,{v3.16b},v2.16b 78 ext v5.16b,v0.16b,v3.16b,#12 79 st1 {v3.4s},[x2],#16 80 aese v6.16b,v0.16b 81 82 eor v3.16b,v3.16b,v5.16b 83 ext v5.16b,v0.16b,v5.16b,#12 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v6.16b,v6.16b,v1.16b 87 eor v3.16b,v3.16b,v5.16b 88 shl v1.16b,v1.16b,#1 89 eor v3.16b,v3.16b,v6.16b 90 91 tbl v6.16b,{v3.16b},v2.16b 92 ext v5.16b,v0.16b,v3.16b,#12 93 st1 {v3.4s},[x2],#16 94 aese v6.16b,v0.16b 95 96 eor v3.16b,v3.16b,v5.16b 97 ext v5.16b,v0.16b,v5.16b,#12 98 eor v3.16b,v3.16b,v5.16b 99 ext v5.16b,v0.16b,v5.16b,#12 100 eor v6.16b,v6.16b,v1.16b 101 eor v3.16b,v3.16b,v5.16b 102 eor v3.16b,v3.16b,v6.16b 103 st1 {v3.4s},[x2] 104 add x2,x2,#0x50 105 106 mov w12,#10 107 b .Ldone 108 109// 192-bit key support was removed. 110 111.align 4 112.L256: 113 ld1 {v4.16b},[x0] 114 mov w1,#7 115 mov w12,#14 116 st1 {v3.4s},[x2],#16 117 118.Loop256: 119 tbl v6.16b,{v4.16b},v2.16b 120 ext v5.16b,v0.16b,v3.16b,#12 121 st1 {v4.4s},[x2],#16 122 aese v6.16b,v0.16b 123 subs w1,w1,#1 124 125 eor v3.16b,v3.16b,v5.16b 126 ext v5.16b,v0.16b,v5.16b,#12 127 eor v3.16b,v3.16b,v5.16b 128 ext v5.16b,v0.16b,v5.16b,#12 129 eor v6.16b,v6.16b,v1.16b 130 eor v3.16b,v3.16b,v5.16b 131 shl v1.16b,v1.16b,#1 132 eor v3.16b,v3.16b,v6.16b 133 st1 {v3.4s},[x2],#16 134 b.eq .Ldone 135 136 dup v6.4s,v3.s[3] // just splat 137 ext v5.16b,v0.16b,v4.16b,#12 138 aese v6.16b,v0.16b 139 140 eor v4.16b,v4.16b,v5.16b 141 ext v5.16b,v0.16b,v5.16b,#12 142 eor v4.16b,v4.16b,v5.16b 143 ext v5.16b,v0.16b,v5.16b,#12 144 eor v4.16b,v4.16b,v5.16b 145 146 eor v4.16b,v4.16b,v6.16b 147 b .Loop256 148 149.Ldone: 150 str w12,[x2] 151 mov x3,#0 152 153.Lenc_key_abort: 154 mov x0,x3 // return value 155 ldr x29,[sp],#16 156 ret 157.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 158.globl aes_hw_encrypt 159.hidden aes_hw_encrypt 160.type aes_hw_encrypt,%function 161.align 5 162aes_hw_encrypt: 163 AARCH64_VALID_CALL_TARGET 164 ldr w3,[x2,#240] 165 ld1 {v0.4s},[x2],#16 166 ld1 {v2.16b},[x0] 167 sub w3,w3,#2 168 ld1 {v1.4s},[x2],#16 169 170.Loop_enc: 171 aese v2.16b,v0.16b 172 aesmc v2.16b,v2.16b 173 ld1 {v0.4s},[x2],#16 174 subs w3,w3,#2 175 aese v2.16b,v1.16b 176 aesmc v2.16b,v2.16b 177 ld1 {v1.4s},[x2],#16 178 b.gt .Loop_enc 179 180 aese v2.16b,v0.16b 181 aesmc v2.16b,v2.16b 182 ld1 {v0.4s},[x2] 183 aese v2.16b,v1.16b 184 eor v2.16b,v2.16b,v0.16b 185 186 st1 {v2.16b},[x1] 187 ret 188.size aes_hw_encrypt,.-aes_hw_encrypt 189.globl aes_hw_ctr32_encrypt_blocks 190.hidden aes_hw_ctr32_encrypt_blocks 191.type aes_hw_ctr32_encrypt_blocks,%function 192.align 5 193aes_hw_ctr32_encrypt_blocks: 194 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 195 AARCH64_VALID_CALL_TARGET 196 stp x29,x30,[sp,#-16]! 197 add x29,sp,#0 198 ldr w5,[x3,#240] 199 200 ldr w8, [x4, #12] 201 ld1 {v0.4s},[x4] 202 203 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 204 sub w5,w5,#4 205 mov x12,#16 206 cmp x2,#2 207 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 208 sub w5,w5,#2 209 ld1 {v20.4s,v21.4s},[x7],#32 210 ld1 {v22.4s,v23.4s},[x7],#32 211 ld1 {v7.4s},[x7] 212 add x7,x3,#32 213 mov w6,w5 214 csel x12,xzr,x12,lo 215 216 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 217 // affected by silicon errata #1742098 [0] and #1655431 [1], 218 // respectively, where the second instruction of an aese/aesmc 219 // instruction pair may execute twice if an interrupt is taken right 220 // after the first instruction consumes an input register of which a 221 // single 32-bit lane has been updated the last time it was modified. 222 // 223 // This function uses a counter in one 32-bit lane. The vmov lines 224 // could write to v1.16b and v18.16b directly, but that trips this bugs. 225 // We write to v6.16b and copy to the final register as a workaround. 226 // 227 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 228 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 229#ifndef __AARCH64EB__ 230 rev w8, w8 231#endif 232 add w10, w8, #1 233 orr v6.16b,v0.16b,v0.16b 234 rev w10, w10 235 mov v6.s[3],w10 236 add w8, w8, #2 237 orr v1.16b,v6.16b,v6.16b 238 b.ls .Lctr32_tail 239 rev w12, w8 240 mov v6.s[3],w12 241 sub x2,x2,#3 // bias 242 orr v18.16b,v6.16b,v6.16b 243 b .Loop3x_ctr32 244 245.align 4 246.Loop3x_ctr32: 247 aese v0.16b,v16.16b 248 aesmc v0.16b,v0.16b 249 aese v1.16b,v16.16b 250 aesmc v1.16b,v1.16b 251 aese v18.16b,v16.16b 252 aesmc v18.16b,v18.16b 253 ld1 {v16.4s},[x7],#16 254 subs w6,w6,#2 255 aese v0.16b,v17.16b 256 aesmc v0.16b,v0.16b 257 aese v1.16b,v17.16b 258 aesmc v1.16b,v1.16b 259 aese v18.16b,v17.16b 260 aesmc v18.16b,v18.16b 261 ld1 {v17.4s},[x7],#16 262 b.gt .Loop3x_ctr32 263 264 aese v0.16b,v16.16b 265 aesmc v4.16b,v0.16b 266 aese v1.16b,v16.16b 267 aesmc v5.16b,v1.16b 268 ld1 {v2.16b},[x0],#16 269 add w9,w8,#1 270 aese v18.16b,v16.16b 271 aesmc v18.16b,v18.16b 272 ld1 {v3.16b},[x0],#16 273 rev w9,w9 274 aese v4.16b,v17.16b 275 aesmc v4.16b,v4.16b 276 aese v5.16b,v17.16b 277 aesmc v5.16b,v5.16b 278 ld1 {v19.16b},[x0],#16 279 mov x7,x3 280 aese v18.16b,v17.16b 281 aesmc v17.16b,v18.16b 282 aese v4.16b,v20.16b 283 aesmc v4.16b,v4.16b 284 aese v5.16b,v20.16b 285 aesmc v5.16b,v5.16b 286 eor v2.16b,v2.16b,v7.16b 287 add w10,w8,#2 288 aese v17.16b,v20.16b 289 aesmc v17.16b,v17.16b 290 eor v3.16b,v3.16b,v7.16b 291 add w8,w8,#3 292 aese v4.16b,v21.16b 293 aesmc v4.16b,v4.16b 294 aese v5.16b,v21.16b 295 aesmc v5.16b,v5.16b 296 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 297 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 298 // 32-bit mode. See the comment above. 299 eor v19.16b,v19.16b,v7.16b 300 mov v6.s[3], w9 301 aese v17.16b,v21.16b 302 aesmc v17.16b,v17.16b 303 orr v0.16b,v6.16b,v6.16b 304 rev w10,w10 305 aese v4.16b,v22.16b 306 aesmc v4.16b,v4.16b 307 mov v6.s[3], w10 308 rev w12,w8 309 aese v5.16b,v22.16b 310 aesmc v5.16b,v5.16b 311 orr v1.16b,v6.16b,v6.16b 312 mov v6.s[3], w12 313 aese v17.16b,v22.16b 314 aesmc v17.16b,v17.16b 315 orr v18.16b,v6.16b,v6.16b 316 subs x2,x2,#3 317 aese v4.16b,v23.16b 318 aese v5.16b,v23.16b 319 aese v17.16b,v23.16b 320 321 eor v2.16b,v2.16b,v4.16b 322 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 323 st1 {v2.16b},[x1],#16 324 eor v3.16b,v3.16b,v5.16b 325 mov w6,w5 326 st1 {v3.16b},[x1],#16 327 eor v19.16b,v19.16b,v17.16b 328 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 329 st1 {v19.16b},[x1],#16 330 b.hs .Loop3x_ctr32 331 332 adds x2,x2,#3 333 b.eq .Lctr32_done 334 cmp x2,#1 335 mov x12,#16 336 csel x12,xzr,x12,eq 337 338.Lctr32_tail: 339 aese v0.16b,v16.16b 340 aesmc v0.16b,v0.16b 341 aese v1.16b,v16.16b 342 aesmc v1.16b,v1.16b 343 ld1 {v16.4s},[x7],#16 344 subs w6,w6,#2 345 aese v0.16b,v17.16b 346 aesmc v0.16b,v0.16b 347 aese v1.16b,v17.16b 348 aesmc v1.16b,v1.16b 349 ld1 {v17.4s},[x7],#16 350 b.gt .Lctr32_tail 351 352 aese v0.16b,v16.16b 353 aesmc v0.16b,v0.16b 354 aese v1.16b,v16.16b 355 aesmc v1.16b,v1.16b 356 aese v0.16b,v17.16b 357 aesmc v0.16b,v0.16b 358 aese v1.16b,v17.16b 359 aesmc v1.16b,v1.16b 360 ld1 {v2.16b},[x0],x12 361 aese v0.16b,v20.16b 362 aesmc v0.16b,v0.16b 363 aese v1.16b,v20.16b 364 aesmc v1.16b,v1.16b 365 ld1 {v3.16b},[x0] 366 aese v0.16b,v21.16b 367 aesmc v0.16b,v0.16b 368 aese v1.16b,v21.16b 369 aesmc v1.16b,v1.16b 370 eor v2.16b,v2.16b,v7.16b 371 aese v0.16b,v22.16b 372 aesmc v0.16b,v0.16b 373 aese v1.16b,v22.16b 374 aesmc v1.16b,v1.16b 375 eor v3.16b,v3.16b,v7.16b 376 aese v0.16b,v23.16b 377 aese v1.16b,v23.16b 378 379 cmp x2,#1 380 eor v2.16b,v2.16b,v0.16b 381 eor v3.16b,v3.16b,v1.16b 382 st1 {v2.16b},[x1],#16 383 b.eq .Lctr32_done 384 st1 {v3.16b},[x1] 385 386.Lctr32_done: 387 ldr x29,[sp],#16 388 ret 389.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 390#endif 391#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) 392