1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7#include <ring-core/arm_arch.h> 8 9#if __ARM_MAX_ARCH__>=7 10.text 11.arch armv8-a+crypto 12.section .rodata 13.align 5 14Lrcon: 15.long 0x01,0x01,0x01,0x01 16.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 17.long 0x1b,0x1b,0x1b,0x1b 18 19.text 20 21.globl aes_hw_set_encrypt_key 22 23.def aes_hw_set_encrypt_key 24 .type 32 25.endef 26.align 5 27aes_hw_set_encrypt_key: 28Lenc_key: 29 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 30 AARCH64_VALID_CALL_TARGET 31 stp x29,x30,[sp,#-16]! 32 add x29,sp,#0 33 mov x3,#-1 34 cmp x0,#0 35 b.eq Lenc_key_abort 36 cmp x2,#0 37 b.eq Lenc_key_abort 38 mov x3,#-2 39 cmp w1,#128 40 b.lt Lenc_key_abort 41 cmp w1,#256 42 b.gt Lenc_key_abort 43 tst w1,#0x3f 44 b.ne Lenc_key_abort 45 46 adrp x3,Lrcon 47 add x3,x3,:lo12:Lrcon 48 cmp w1,#192 49 50 eor v0.16b,v0.16b,v0.16b 51 ld1 {v3.16b},[x0],#16 52 mov w1,#8 // reuse w1 53 ld1 {v1.4s,v2.4s},[x3],#32 54 55 b.lt Loop128 56 // 192-bit key support was removed. 57 b L256 58 59.align 4 60Loop128: 61 tbl v6.16b,{v3.16b},v2.16b 62 ext v5.16b,v0.16b,v3.16b,#12 63 st1 {v3.4s},[x2],#16 64 aese v6.16b,v0.16b 65 subs w1,w1,#1 66 67 eor v3.16b,v3.16b,v5.16b 68 ext v5.16b,v0.16b,v5.16b,#12 69 eor v3.16b,v3.16b,v5.16b 70 ext v5.16b,v0.16b,v5.16b,#12 71 eor v6.16b,v6.16b,v1.16b 72 eor v3.16b,v3.16b,v5.16b 73 shl v1.16b,v1.16b,#1 74 eor v3.16b,v3.16b,v6.16b 75 b.ne Loop128 76 77 ld1 {v1.4s},[x3] 78 79 tbl v6.16b,{v3.16b},v2.16b 80 ext v5.16b,v0.16b,v3.16b,#12 81 st1 {v3.4s},[x2],#16 82 aese v6.16b,v0.16b 83 84 eor v3.16b,v3.16b,v5.16b 85 ext v5.16b,v0.16b,v5.16b,#12 86 eor v3.16b,v3.16b,v5.16b 87 ext v5.16b,v0.16b,v5.16b,#12 88 eor v6.16b,v6.16b,v1.16b 89 eor v3.16b,v3.16b,v5.16b 90 shl v1.16b,v1.16b,#1 91 eor v3.16b,v3.16b,v6.16b 92 93 tbl v6.16b,{v3.16b},v2.16b 94 ext v5.16b,v0.16b,v3.16b,#12 95 st1 {v3.4s},[x2],#16 96 aese v6.16b,v0.16b 97 98 eor v3.16b,v3.16b,v5.16b 99 ext v5.16b,v0.16b,v5.16b,#12 100 eor v3.16b,v3.16b,v5.16b 101 ext v5.16b,v0.16b,v5.16b,#12 102 eor v6.16b,v6.16b,v1.16b 103 eor v3.16b,v3.16b,v5.16b 104 eor v3.16b,v3.16b,v6.16b 105 st1 {v3.4s},[x2] 106 add x2,x2,#0x50 107 108 mov w12,#10 109 b Ldone 110 111// 192-bit key support was removed. 112 113.align 4 114L256: 115 ld1 {v4.16b},[x0] 116 mov w1,#7 117 mov w12,#14 118 st1 {v3.4s},[x2],#16 119 120Loop256: 121 tbl v6.16b,{v4.16b},v2.16b 122 ext v5.16b,v0.16b,v3.16b,#12 123 st1 {v4.4s},[x2],#16 124 aese v6.16b,v0.16b 125 subs w1,w1,#1 126 127 eor v3.16b,v3.16b,v5.16b 128 ext v5.16b,v0.16b,v5.16b,#12 129 eor v3.16b,v3.16b,v5.16b 130 ext v5.16b,v0.16b,v5.16b,#12 131 eor v6.16b,v6.16b,v1.16b 132 eor v3.16b,v3.16b,v5.16b 133 shl v1.16b,v1.16b,#1 134 eor v3.16b,v3.16b,v6.16b 135 st1 {v3.4s},[x2],#16 136 b.eq Ldone 137 138 dup v6.4s,v3.s[3] // just splat 139 ext v5.16b,v0.16b,v4.16b,#12 140 aese v6.16b,v0.16b 141 142 eor v4.16b,v4.16b,v5.16b 143 ext v5.16b,v0.16b,v5.16b,#12 144 eor v4.16b,v4.16b,v5.16b 145 ext v5.16b,v0.16b,v5.16b,#12 146 eor v4.16b,v4.16b,v5.16b 147 148 eor v4.16b,v4.16b,v6.16b 149 b Loop256 150 151Ldone: 152 str w12,[x2] 153 mov x3,#0 154 155Lenc_key_abort: 156 mov x0,x3 // return value 157 ldr x29,[sp],#16 158 ret 159 160.globl aes_hw_encrypt 161 162.def aes_hw_encrypt 163 .type 32 164.endef 165.align 5 166aes_hw_encrypt: 167 AARCH64_VALID_CALL_TARGET 168 ldr w3,[x2,#240] 169 ld1 {v0.4s},[x2],#16 170 ld1 {v2.16b},[x0] 171 sub w3,w3,#2 172 ld1 {v1.4s},[x2],#16 173 174Loop_enc: 175 aese v2.16b,v0.16b 176 aesmc v2.16b,v2.16b 177 ld1 {v0.4s},[x2],#16 178 subs w3,w3,#2 179 aese v2.16b,v1.16b 180 aesmc v2.16b,v2.16b 181 ld1 {v1.4s},[x2],#16 182 b.gt Loop_enc 183 184 aese v2.16b,v0.16b 185 aesmc v2.16b,v2.16b 186 ld1 {v0.4s},[x2] 187 aese v2.16b,v1.16b 188 eor v2.16b,v2.16b,v0.16b 189 190 st1 {v2.16b},[x1] 191 ret 192 193.globl aes_hw_ctr32_encrypt_blocks 194 195.def aes_hw_ctr32_encrypt_blocks 196 .type 32 197.endef 198.align 5 199aes_hw_ctr32_encrypt_blocks: 200 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 201 AARCH64_VALID_CALL_TARGET 202 stp x29,x30,[sp,#-16]! 203 add x29,sp,#0 204 ldr w5,[x3,#240] 205 206 ldr w8, [x4, #12] 207 ld1 {v0.4s},[x4] 208 209 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 210 sub w5,w5,#4 211 mov x12,#16 212 cmp x2,#2 213 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 214 sub w5,w5,#2 215 ld1 {v20.4s,v21.4s},[x7],#32 216 ld1 {v22.4s,v23.4s},[x7],#32 217 ld1 {v7.4s},[x7] 218 add x7,x3,#32 219 mov w6,w5 220 csel x12,xzr,x12,lo 221 222 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 223 // affected by silicon errata #1742098 [0] and #1655431 [1], 224 // respectively, where the second instruction of an aese/aesmc 225 // instruction pair may execute twice if an interrupt is taken right 226 // after the first instruction consumes an input register of which a 227 // single 32-bit lane has been updated the last time it was modified. 228 // 229 // This function uses a counter in one 32-bit lane. The vmov lines 230 // could write to v1.16b and v18.16b directly, but that trips this bugs. 231 // We write to v6.16b and copy to the final register as a workaround. 232 // 233 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 234 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 235#ifndef __AARCH64EB__ 236 rev w8, w8 237#endif 238 add w10, w8, #1 239 orr v6.16b,v0.16b,v0.16b 240 rev w10, w10 241 mov v6.s[3],w10 242 add w8, w8, #2 243 orr v1.16b,v6.16b,v6.16b 244 b.ls Lctr32_tail 245 rev w12, w8 246 mov v6.s[3],w12 247 sub x2,x2,#3 // bias 248 orr v18.16b,v6.16b,v6.16b 249 b Loop3x_ctr32 250 251.align 4 252Loop3x_ctr32: 253 aese v0.16b,v16.16b 254 aesmc v0.16b,v0.16b 255 aese v1.16b,v16.16b 256 aesmc v1.16b,v1.16b 257 aese v18.16b,v16.16b 258 aesmc v18.16b,v18.16b 259 ld1 {v16.4s},[x7],#16 260 subs w6,w6,#2 261 aese v0.16b,v17.16b 262 aesmc v0.16b,v0.16b 263 aese v1.16b,v17.16b 264 aesmc v1.16b,v1.16b 265 aese v18.16b,v17.16b 266 aesmc v18.16b,v18.16b 267 ld1 {v17.4s},[x7],#16 268 b.gt Loop3x_ctr32 269 270 aese v0.16b,v16.16b 271 aesmc v4.16b,v0.16b 272 aese v1.16b,v16.16b 273 aesmc v5.16b,v1.16b 274 ld1 {v2.16b},[x0],#16 275 add w9,w8,#1 276 aese v18.16b,v16.16b 277 aesmc v18.16b,v18.16b 278 ld1 {v3.16b},[x0],#16 279 rev w9,w9 280 aese v4.16b,v17.16b 281 aesmc v4.16b,v4.16b 282 aese v5.16b,v17.16b 283 aesmc v5.16b,v5.16b 284 ld1 {v19.16b},[x0],#16 285 mov x7,x3 286 aese v18.16b,v17.16b 287 aesmc v17.16b,v18.16b 288 aese v4.16b,v20.16b 289 aesmc v4.16b,v4.16b 290 aese v5.16b,v20.16b 291 aesmc v5.16b,v5.16b 292 eor v2.16b,v2.16b,v7.16b 293 add w10,w8,#2 294 aese v17.16b,v20.16b 295 aesmc v17.16b,v17.16b 296 eor v3.16b,v3.16b,v7.16b 297 add w8,w8,#3 298 aese v4.16b,v21.16b 299 aesmc v4.16b,v4.16b 300 aese v5.16b,v21.16b 301 aesmc v5.16b,v5.16b 302 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 303 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 304 // 32-bit mode. See the comment above. 305 eor v19.16b,v19.16b,v7.16b 306 mov v6.s[3], w9 307 aese v17.16b,v21.16b 308 aesmc v17.16b,v17.16b 309 orr v0.16b,v6.16b,v6.16b 310 rev w10,w10 311 aese v4.16b,v22.16b 312 aesmc v4.16b,v4.16b 313 mov v6.s[3], w10 314 rev w12,w8 315 aese v5.16b,v22.16b 316 aesmc v5.16b,v5.16b 317 orr v1.16b,v6.16b,v6.16b 318 mov v6.s[3], w12 319 aese v17.16b,v22.16b 320 aesmc v17.16b,v17.16b 321 orr v18.16b,v6.16b,v6.16b 322 subs x2,x2,#3 323 aese v4.16b,v23.16b 324 aese v5.16b,v23.16b 325 aese v17.16b,v23.16b 326 327 eor v2.16b,v2.16b,v4.16b 328 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 329 st1 {v2.16b},[x1],#16 330 eor v3.16b,v3.16b,v5.16b 331 mov w6,w5 332 st1 {v3.16b},[x1],#16 333 eor v19.16b,v19.16b,v17.16b 334 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 335 st1 {v19.16b},[x1],#16 336 b.hs Loop3x_ctr32 337 338 adds x2,x2,#3 339 b.eq Lctr32_done 340 cmp x2,#1 341 mov x12,#16 342 csel x12,xzr,x12,eq 343 344Lctr32_tail: 345 aese v0.16b,v16.16b 346 aesmc v0.16b,v0.16b 347 aese v1.16b,v16.16b 348 aesmc v1.16b,v1.16b 349 ld1 {v16.4s},[x7],#16 350 subs w6,w6,#2 351 aese v0.16b,v17.16b 352 aesmc v0.16b,v0.16b 353 aese v1.16b,v17.16b 354 aesmc v1.16b,v1.16b 355 ld1 {v17.4s},[x7],#16 356 b.gt Lctr32_tail 357 358 aese v0.16b,v16.16b 359 aesmc v0.16b,v0.16b 360 aese v1.16b,v16.16b 361 aesmc v1.16b,v1.16b 362 aese v0.16b,v17.16b 363 aesmc v0.16b,v0.16b 364 aese v1.16b,v17.16b 365 aesmc v1.16b,v1.16b 366 ld1 {v2.16b},[x0],x12 367 aese v0.16b,v20.16b 368 aesmc v0.16b,v0.16b 369 aese v1.16b,v20.16b 370 aesmc v1.16b,v1.16b 371 ld1 {v3.16b},[x0] 372 aese v0.16b,v21.16b 373 aesmc v0.16b,v0.16b 374 aese v1.16b,v21.16b 375 aesmc v1.16b,v1.16b 376 eor v2.16b,v2.16b,v7.16b 377 aese v0.16b,v22.16b 378 aesmc v0.16b,v0.16b 379 aese v1.16b,v22.16b 380 aesmc v1.16b,v1.16b 381 eor v3.16b,v3.16b,v7.16b 382 aese v0.16b,v23.16b 383 aese v1.16b,v23.16b 384 385 cmp x2,#1 386 eor v2.16b,v2.16b,v0.16b 387 eor v3.16b,v3.16b,v1.16b 388 st1 {v2.16b},[x1],#16 389 b.eq Lctr32_done 390 st1 {v3.16b},[x1] 391 392Lctr32_done: 393 ldr x29,[sp],#16 394 ret 395 396#endif 397#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 398