1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <openssl/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 8// 9// Licensed under the OpenSSL license (the "License"). You may not use 10// this file except in compliance with the License. You can obtain a copy 11// in the file LICENSE in the source distribution or at 12// https://www.openssl.org/source/license.html 13 14// ==================================================================== 15// Written by Andy Polyakov <[email protected]> for the OpenSSL 16// project. The module is, however, dual licensed under OpenSSL and 17// CRYPTOGAMS licenses depending on where you obtain it. For further 18// details see http://www.openssl.org/~appro/cryptogams/. 19// 20// Permission to use under GPLv2 terms is granted. 21// ==================================================================== 22// 23// SHA256/512 for ARMv8. 24// 25// Performance in cycles per processed byte and improvement coefficient 26// over code generated with "default" compiler: 27// 28// SHA256-hw SHA256(*) SHA512 29// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 30// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 31// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 32// Denver 2.01 10.5 (+26%) 6.70 (+8%) 33// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 34// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 35// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 36// 37// (*) Software SHA256 results are of lesser relevance, presented 38// mostly for informational purposes. 39// (**) The result is a trade-off: it's possible to improve it by 40// 10% (or by 1 cycle per round), but at the cost of 20% loss 41// on Cortex-A53 (or by 4 cycles per round). 42// (***) Super-impressive coefficients over gcc-generated code are 43// indication of some compiler "pathology", most notably code 44// generated with -mgeneral-regs-only is significantly faster 45// and the gap is only 40-90%. 46 47#ifndef __KERNEL__ 48# include <openssl/arm_arch.h> 49#endif 50 51.text 52 53.globl sha512_block_data_order_nohw 54 55.def sha512_block_data_order_nohw 56 .type 32 57.endef 58.align 6 59sha512_block_data_order_nohw: 60 AARCH64_SIGN_LINK_REGISTER 61 stp x29,x30,[sp,#-128]! 62 add x29,sp,#0 63 64 stp x19,x20,[sp,#16] 65 stp x21,x22,[sp,#32] 66 stp x23,x24,[sp,#48] 67 stp x25,x26,[sp,#64] 68 stp x27,x28,[sp,#80] 69 sub sp,sp,#4*8 70 71 ldp x20,x21,[x0] // load context 72 ldp x22,x23,[x0,#2*8] 73 ldp x24,x25,[x0,#4*8] 74 add x2,x1,x2,lsl#7 // end of input 75 ldp x26,x27,[x0,#6*8] 76 adrp x30,LK512 77 add x30,x30,:lo12:LK512 78 stp x0,x2,[x29,#96] 79 80Loop: 81 ldp x3,x4,[x1],#2*8 82 ldr x19,[x30],#8 // *K++ 83 eor x28,x21,x22 // magic seed 84 str x1,[x29,#112] 85#ifndef __AARCH64EB__ 86 rev x3,x3 // 0 87#endif 88 ror x16,x24,#14 89 add x27,x27,x19 // h+=K[i] 90 eor x6,x24,x24,ror#23 91 and x17,x25,x24 92 bic x19,x26,x24 93 add x27,x27,x3 // h+=X[i] 94 orr x17,x17,x19 // Ch(e,f,g) 95 eor x19,x20,x21 // a^b, b^c in next round 96 eor x16,x16,x6,ror#18 // Sigma1(e) 97 ror x6,x20,#28 98 add x27,x27,x17 // h+=Ch(e,f,g) 99 eor x17,x20,x20,ror#5 100 add x27,x27,x16 // h+=Sigma1(e) 101 and x28,x28,x19 // (b^c)&=(a^b) 102 add x23,x23,x27 // d+=h 103 eor x28,x28,x21 // Maj(a,b,c) 104 eor x17,x6,x17,ror#34 // Sigma0(a) 105 add x27,x27,x28 // h+=Maj(a,b,c) 106 ldr x28,[x30],#8 // *K++, x19 in next round 107 //add x27,x27,x17 // h+=Sigma0(a) 108#ifndef __AARCH64EB__ 109 rev x4,x4 // 1 110#endif 111 ldp x5,x6,[x1],#2*8 112 add x27,x27,x17 // h+=Sigma0(a) 113 ror x16,x23,#14 114 add x26,x26,x28 // h+=K[i] 115 eor x7,x23,x23,ror#23 116 and x17,x24,x23 117 bic x28,x25,x23 118 add x26,x26,x4 // h+=X[i] 119 orr x17,x17,x28 // Ch(e,f,g) 120 eor x28,x27,x20 // a^b, b^c in next round 121 eor x16,x16,x7,ror#18 // Sigma1(e) 122 ror x7,x27,#28 123 add x26,x26,x17 // h+=Ch(e,f,g) 124 eor x17,x27,x27,ror#5 125 add x26,x26,x16 // h+=Sigma1(e) 126 and x19,x19,x28 // (b^c)&=(a^b) 127 add x22,x22,x26 // d+=h 128 eor x19,x19,x20 // Maj(a,b,c) 129 eor x17,x7,x17,ror#34 // Sigma0(a) 130 add x26,x26,x19 // h+=Maj(a,b,c) 131 ldr x19,[x30],#8 // *K++, x28 in next round 132 //add x26,x26,x17 // h+=Sigma0(a) 133#ifndef __AARCH64EB__ 134 rev x5,x5 // 2 135#endif 136 add x26,x26,x17 // h+=Sigma0(a) 137 ror x16,x22,#14 138 add x25,x25,x19 // h+=K[i] 139 eor x8,x22,x22,ror#23 140 and x17,x23,x22 141 bic x19,x24,x22 142 add x25,x25,x5 // h+=X[i] 143 orr x17,x17,x19 // Ch(e,f,g) 144 eor x19,x26,x27 // a^b, b^c in next round 145 eor x16,x16,x8,ror#18 // Sigma1(e) 146 ror x8,x26,#28 147 add x25,x25,x17 // h+=Ch(e,f,g) 148 eor x17,x26,x26,ror#5 149 add x25,x25,x16 // h+=Sigma1(e) 150 and x28,x28,x19 // (b^c)&=(a^b) 151 add x21,x21,x25 // d+=h 152 eor x28,x28,x27 // Maj(a,b,c) 153 eor x17,x8,x17,ror#34 // Sigma0(a) 154 add x25,x25,x28 // h+=Maj(a,b,c) 155 ldr x28,[x30],#8 // *K++, x19 in next round 156 //add x25,x25,x17 // h+=Sigma0(a) 157#ifndef __AARCH64EB__ 158 rev x6,x6 // 3 159#endif 160 ldp x7,x8,[x1],#2*8 161 add x25,x25,x17 // h+=Sigma0(a) 162 ror x16,x21,#14 163 add x24,x24,x28 // h+=K[i] 164 eor x9,x21,x21,ror#23 165 and x17,x22,x21 166 bic x28,x23,x21 167 add x24,x24,x6 // h+=X[i] 168 orr x17,x17,x28 // Ch(e,f,g) 169 eor x28,x25,x26 // a^b, b^c in next round 170 eor x16,x16,x9,ror#18 // Sigma1(e) 171 ror x9,x25,#28 172 add x24,x24,x17 // h+=Ch(e,f,g) 173 eor x17,x25,x25,ror#5 174 add x24,x24,x16 // h+=Sigma1(e) 175 and x19,x19,x28 // (b^c)&=(a^b) 176 add x20,x20,x24 // d+=h 177 eor x19,x19,x26 // Maj(a,b,c) 178 eor x17,x9,x17,ror#34 // Sigma0(a) 179 add x24,x24,x19 // h+=Maj(a,b,c) 180 ldr x19,[x30],#8 // *K++, x28 in next round 181 //add x24,x24,x17 // h+=Sigma0(a) 182#ifndef __AARCH64EB__ 183 rev x7,x7 // 4 184#endif 185 add x24,x24,x17 // h+=Sigma0(a) 186 ror x16,x20,#14 187 add x23,x23,x19 // h+=K[i] 188 eor x10,x20,x20,ror#23 189 and x17,x21,x20 190 bic x19,x22,x20 191 add x23,x23,x7 // h+=X[i] 192 orr x17,x17,x19 // Ch(e,f,g) 193 eor x19,x24,x25 // a^b, b^c in next round 194 eor x16,x16,x10,ror#18 // Sigma1(e) 195 ror x10,x24,#28 196 add x23,x23,x17 // h+=Ch(e,f,g) 197 eor x17,x24,x24,ror#5 198 add x23,x23,x16 // h+=Sigma1(e) 199 and x28,x28,x19 // (b^c)&=(a^b) 200 add x27,x27,x23 // d+=h 201 eor x28,x28,x25 // Maj(a,b,c) 202 eor x17,x10,x17,ror#34 // Sigma0(a) 203 add x23,x23,x28 // h+=Maj(a,b,c) 204 ldr x28,[x30],#8 // *K++, x19 in next round 205 //add x23,x23,x17 // h+=Sigma0(a) 206#ifndef __AARCH64EB__ 207 rev x8,x8 // 5 208#endif 209 ldp x9,x10,[x1],#2*8 210 add x23,x23,x17 // h+=Sigma0(a) 211 ror x16,x27,#14 212 add x22,x22,x28 // h+=K[i] 213 eor x11,x27,x27,ror#23 214 and x17,x20,x27 215 bic x28,x21,x27 216 add x22,x22,x8 // h+=X[i] 217 orr x17,x17,x28 // Ch(e,f,g) 218 eor x28,x23,x24 // a^b, b^c in next round 219 eor x16,x16,x11,ror#18 // Sigma1(e) 220 ror x11,x23,#28 221 add x22,x22,x17 // h+=Ch(e,f,g) 222 eor x17,x23,x23,ror#5 223 add x22,x22,x16 // h+=Sigma1(e) 224 and x19,x19,x28 // (b^c)&=(a^b) 225 add x26,x26,x22 // d+=h 226 eor x19,x19,x24 // Maj(a,b,c) 227 eor x17,x11,x17,ror#34 // Sigma0(a) 228 add x22,x22,x19 // h+=Maj(a,b,c) 229 ldr x19,[x30],#8 // *K++, x28 in next round 230 //add x22,x22,x17 // h+=Sigma0(a) 231#ifndef __AARCH64EB__ 232 rev x9,x9 // 6 233#endif 234 add x22,x22,x17 // h+=Sigma0(a) 235 ror x16,x26,#14 236 add x21,x21,x19 // h+=K[i] 237 eor x12,x26,x26,ror#23 238 and x17,x27,x26 239 bic x19,x20,x26 240 add x21,x21,x9 // h+=X[i] 241 orr x17,x17,x19 // Ch(e,f,g) 242 eor x19,x22,x23 // a^b, b^c in next round 243 eor x16,x16,x12,ror#18 // Sigma1(e) 244 ror x12,x22,#28 245 add x21,x21,x17 // h+=Ch(e,f,g) 246 eor x17,x22,x22,ror#5 247 add x21,x21,x16 // h+=Sigma1(e) 248 and x28,x28,x19 // (b^c)&=(a^b) 249 add x25,x25,x21 // d+=h 250 eor x28,x28,x23 // Maj(a,b,c) 251 eor x17,x12,x17,ror#34 // Sigma0(a) 252 add x21,x21,x28 // h+=Maj(a,b,c) 253 ldr x28,[x30],#8 // *K++, x19 in next round 254 //add x21,x21,x17 // h+=Sigma0(a) 255#ifndef __AARCH64EB__ 256 rev x10,x10 // 7 257#endif 258 ldp x11,x12,[x1],#2*8 259 add x21,x21,x17 // h+=Sigma0(a) 260 ror x16,x25,#14 261 add x20,x20,x28 // h+=K[i] 262 eor x13,x25,x25,ror#23 263 and x17,x26,x25 264 bic x28,x27,x25 265 add x20,x20,x10 // h+=X[i] 266 orr x17,x17,x28 // Ch(e,f,g) 267 eor x28,x21,x22 // a^b, b^c in next round 268 eor x16,x16,x13,ror#18 // Sigma1(e) 269 ror x13,x21,#28 270 add x20,x20,x17 // h+=Ch(e,f,g) 271 eor x17,x21,x21,ror#5 272 add x20,x20,x16 // h+=Sigma1(e) 273 and x19,x19,x28 // (b^c)&=(a^b) 274 add x24,x24,x20 // d+=h 275 eor x19,x19,x22 // Maj(a,b,c) 276 eor x17,x13,x17,ror#34 // Sigma0(a) 277 add x20,x20,x19 // h+=Maj(a,b,c) 278 ldr x19,[x30],#8 // *K++, x28 in next round 279 //add x20,x20,x17 // h+=Sigma0(a) 280#ifndef __AARCH64EB__ 281 rev x11,x11 // 8 282#endif 283 add x20,x20,x17 // h+=Sigma0(a) 284 ror x16,x24,#14 285 add x27,x27,x19 // h+=K[i] 286 eor x14,x24,x24,ror#23 287 and x17,x25,x24 288 bic x19,x26,x24 289 add x27,x27,x11 // h+=X[i] 290 orr x17,x17,x19 // Ch(e,f,g) 291 eor x19,x20,x21 // a^b, b^c in next round 292 eor x16,x16,x14,ror#18 // Sigma1(e) 293 ror x14,x20,#28 294 add x27,x27,x17 // h+=Ch(e,f,g) 295 eor x17,x20,x20,ror#5 296 add x27,x27,x16 // h+=Sigma1(e) 297 and x28,x28,x19 // (b^c)&=(a^b) 298 add x23,x23,x27 // d+=h 299 eor x28,x28,x21 // Maj(a,b,c) 300 eor x17,x14,x17,ror#34 // Sigma0(a) 301 add x27,x27,x28 // h+=Maj(a,b,c) 302 ldr x28,[x30],#8 // *K++, x19 in next round 303 //add x27,x27,x17 // h+=Sigma0(a) 304#ifndef __AARCH64EB__ 305 rev x12,x12 // 9 306#endif 307 ldp x13,x14,[x1],#2*8 308 add x27,x27,x17 // h+=Sigma0(a) 309 ror x16,x23,#14 310 add x26,x26,x28 // h+=K[i] 311 eor x15,x23,x23,ror#23 312 and x17,x24,x23 313 bic x28,x25,x23 314 add x26,x26,x12 // h+=X[i] 315 orr x17,x17,x28 // Ch(e,f,g) 316 eor x28,x27,x20 // a^b, b^c in next round 317 eor x16,x16,x15,ror#18 // Sigma1(e) 318 ror x15,x27,#28 319 add x26,x26,x17 // h+=Ch(e,f,g) 320 eor x17,x27,x27,ror#5 321 add x26,x26,x16 // h+=Sigma1(e) 322 and x19,x19,x28 // (b^c)&=(a^b) 323 add x22,x22,x26 // d+=h 324 eor x19,x19,x20 // Maj(a,b,c) 325 eor x17,x15,x17,ror#34 // Sigma0(a) 326 add x26,x26,x19 // h+=Maj(a,b,c) 327 ldr x19,[x30],#8 // *K++, x28 in next round 328 //add x26,x26,x17 // h+=Sigma0(a) 329#ifndef __AARCH64EB__ 330 rev x13,x13 // 10 331#endif 332 add x26,x26,x17 // h+=Sigma0(a) 333 ror x16,x22,#14 334 add x25,x25,x19 // h+=K[i] 335 eor x0,x22,x22,ror#23 336 and x17,x23,x22 337 bic x19,x24,x22 338 add x25,x25,x13 // h+=X[i] 339 orr x17,x17,x19 // Ch(e,f,g) 340 eor x19,x26,x27 // a^b, b^c in next round 341 eor x16,x16,x0,ror#18 // Sigma1(e) 342 ror x0,x26,#28 343 add x25,x25,x17 // h+=Ch(e,f,g) 344 eor x17,x26,x26,ror#5 345 add x25,x25,x16 // h+=Sigma1(e) 346 and x28,x28,x19 // (b^c)&=(a^b) 347 add x21,x21,x25 // d+=h 348 eor x28,x28,x27 // Maj(a,b,c) 349 eor x17,x0,x17,ror#34 // Sigma0(a) 350 add x25,x25,x28 // h+=Maj(a,b,c) 351 ldr x28,[x30],#8 // *K++, x19 in next round 352 //add x25,x25,x17 // h+=Sigma0(a) 353#ifndef __AARCH64EB__ 354 rev x14,x14 // 11 355#endif 356 ldp x15,x0,[x1],#2*8 357 add x25,x25,x17 // h+=Sigma0(a) 358 str x6,[sp,#24] 359 ror x16,x21,#14 360 add x24,x24,x28 // h+=K[i] 361 eor x6,x21,x21,ror#23 362 and x17,x22,x21 363 bic x28,x23,x21 364 add x24,x24,x14 // h+=X[i] 365 orr x17,x17,x28 // Ch(e,f,g) 366 eor x28,x25,x26 // a^b, b^c in next round 367 eor x16,x16,x6,ror#18 // Sigma1(e) 368 ror x6,x25,#28 369 add x24,x24,x17 // h+=Ch(e,f,g) 370 eor x17,x25,x25,ror#5 371 add x24,x24,x16 // h+=Sigma1(e) 372 and x19,x19,x28 // (b^c)&=(a^b) 373 add x20,x20,x24 // d+=h 374 eor x19,x19,x26 // Maj(a,b,c) 375 eor x17,x6,x17,ror#34 // Sigma0(a) 376 add x24,x24,x19 // h+=Maj(a,b,c) 377 ldr x19,[x30],#8 // *K++, x28 in next round 378 //add x24,x24,x17 // h+=Sigma0(a) 379#ifndef __AARCH64EB__ 380 rev x15,x15 // 12 381#endif 382 add x24,x24,x17 // h+=Sigma0(a) 383 str x7,[sp,#0] 384 ror x16,x20,#14 385 add x23,x23,x19 // h+=K[i] 386 eor x7,x20,x20,ror#23 387 and x17,x21,x20 388 bic x19,x22,x20 389 add x23,x23,x15 // h+=X[i] 390 orr x17,x17,x19 // Ch(e,f,g) 391 eor x19,x24,x25 // a^b, b^c in next round 392 eor x16,x16,x7,ror#18 // Sigma1(e) 393 ror x7,x24,#28 394 add x23,x23,x17 // h+=Ch(e,f,g) 395 eor x17,x24,x24,ror#5 396 add x23,x23,x16 // h+=Sigma1(e) 397 and x28,x28,x19 // (b^c)&=(a^b) 398 add x27,x27,x23 // d+=h 399 eor x28,x28,x25 // Maj(a,b,c) 400 eor x17,x7,x17,ror#34 // Sigma0(a) 401 add x23,x23,x28 // h+=Maj(a,b,c) 402 ldr x28,[x30],#8 // *K++, x19 in next round 403 //add x23,x23,x17 // h+=Sigma0(a) 404#ifndef __AARCH64EB__ 405 rev x0,x0 // 13 406#endif 407 ldp x1,x2,[x1] 408 add x23,x23,x17 // h+=Sigma0(a) 409 str x8,[sp,#8] 410 ror x16,x27,#14 411 add x22,x22,x28 // h+=K[i] 412 eor x8,x27,x27,ror#23 413 and x17,x20,x27 414 bic x28,x21,x27 415 add x22,x22,x0 // h+=X[i] 416 orr x17,x17,x28 // Ch(e,f,g) 417 eor x28,x23,x24 // a^b, b^c in next round 418 eor x16,x16,x8,ror#18 // Sigma1(e) 419 ror x8,x23,#28 420 add x22,x22,x17 // h+=Ch(e,f,g) 421 eor x17,x23,x23,ror#5 422 add x22,x22,x16 // h+=Sigma1(e) 423 and x19,x19,x28 // (b^c)&=(a^b) 424 add x26,x26,x22 // d+=h 425 eor x19,x19,x24 // Maj(a,b,c) 426 eor x17,x8,x17,ror#34 // Sigma0(a) 427 add x22,x22,x19 // h+=Maj(a,b,c) 428 ldr x19,[x30],#8 // *K++, x28 in next round 429 //add x22,x22,x17 // h+=Sigma0(a) 430#ifndef __AARCH64EB__ 431 rev x1,x1 // 14 432#endif 433 ldr x6,[sp,#24] 434 add x22,x22,x17 // h+=Sigma0(a) 435 str x9,[sp,#16] 436 ror x16,x26,#14 437 add x21,x21,x19 // h+=K[i] 438 eor x9,x26,x26,ror#23 439 and x17,x27,x26 440 bic x19,x20,x26 441 add x21,x21,x1 // h+=X[i] 442 orr x17,x17,x19 // Ch(e,f,g) 443 eor x19,x22,x23 // a^b, b^c in next round 444 eor x16,x16,x9,ror#18 // Sigma1(e) 445 ror x9,x22,#28 446 add x21,x21,x17 // h+=Ch(e,f,g) 447 eor x17,x22,x22,ror#5 448 add x21,x21,x16 // h+=Sigma1(e) 449 and x28,x28,x19 // (b^c)&=(a^b) 450 add x25,x25,x21 // d+=h 451 eor x28,x28,x23 // Maj(a,b,c) 452 eor x17,x9,x17,ror#34 // Sigma0(a) 453 add x21,x21,x28 // h+=Maj(a,b,c) 454 ldr x28,[x30],#8 // *K++, x19 in next round 455 //add x21,x21,x17 // h+=Sigma0(a) 456#ifndef __AARCH64EB__ 457 rev x2,x2 // 15 458#endif 459 ldr x7,[sp,#0] 460 add x21,x21,x17 // h+=Sigma0(a) 461 str x10,[sp,#24] 462 ror x16,x25,#14 463 add x20,x20,x28 // h+=K[i] 464 ror x9,x4,#1 465 and x17,x26,x25 466 ror x8,x1,#19 467 bic x28,x27,x25 468 ror x10,x21,#28 469 add x20,x20,x2 // h+=X[i] 470 eor x16,x16,x25,ror#18 471 eor x9,x9,x4,ror#8 472 orr x17,x17,x28 // Ch(e,f,g) 473 eor x28,x21,x22 // a^b, b^c in next round 474 eor x16,x16,x25,ror#41 // Sigma1(e) 475 eor x10,x10,x21,ror#34 476 add x20,x20,x17 // h+=Ch(e,f,g) 477 and x19,x19,x28 // (b^c)&=(a^b) 478 eor x8,x8,x1,ror#61 479 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 480 add x20,x20,x16 // h+=Sigma1(e) 481 eor x19,x19,x22 // Maj(a,b,c) 482 eor x17,x10,x21,ror#39 // Sigma0(a) 483 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 484 add x3,x3,x12 485 add x24,x24,x20 // d+=h 486 add x20,x20,x19 // h+=Maj(a,b,c) 487 ldr x19,[x30],#8 // *K++, x28 in next round 488 add x3,x3,x9 489 add x20,x20,x17 // h+=Sigma0(a) 490 add x3,x3,x8 491Loop_16_xx: 492 ldr x8,[sp,#8] 493 str x11,[sp,#0] 494 ror x16,x24,#14 495 add x27,x27,x19 // h+=K[i] 496 ror x10,x5,#1 497 and x17,x25,x24 498 ror x9,x2,#19 499 bic x19,x26,x24 500 ror x11,x20,#28 501 add x27,x27,x3 // h+=X[i] 502 eor x16,x16,x24,ror#18 503 eor x10,x10,x5,ror#8 504 orr x17,x17,x19 // Ch(e,f,g) 505 eor x19,x20,x21 // a^b, b^c in next round 506 eor x16,x16,x24,ror#41 // Sigma1(e) 507 eor x11,x11,x20,ror#34 508 add x27,x27,x17 // h+=Ch(e,f,g) 509 and x28,x28,x19 // (b^c)&=(a^b) 510 eor x9,x9,x2,ror#61 511 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 512 add x27,x27,x16 // h+=Sigma1(e) 513 eor x28,x28,x21 // Maj(a,b,c) 514 eor x17,x11,x20,ror#39 // Sigma0(a) 515 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 516 add x4,x4,x13 517 add x23,x23,x27 // d+=h 518 add x27,x27,x28 // h+=Maj(a,b,c) 519 ldr x28,[x30],#8 // *K++, x19 in next round 520 add x4,x4,x10 521 add x27,x27,x17 // h+=Sigma0(a) 522 add x4,x4,x9 523 ldr x9,[sp,#16] 524 str x12,[sp,#8] 525 ror x16,x23,#14 526 add x26,x26,x28 // h+=K[i] 527 ror x11,x6,#1 528 and x17,x24,x23 529 ror x10,x3,#19 530 bic x28,x25,x23 531 ror x12,x27,#28 532 add x26,x26,x4 // h+=X[i] 533 eor x16,x16,x23,ror#18 534 eor x11,x11,x6,ror#8 535 orr x17,x17,x28 // Ch(e,f,g) 536 eor x28,x27,x20 // a^b, b^c in next round 537 eor x16,x16,x23,ror#41 // Sigma1(e) 538 eor x12,x12,x27,ror#34 539 add x26,x26,x17 // h+=Ch(e,f,g) 540 and x19,x19,x28 // (b^c)&=(a^b) 541 eor x10,x10,x3,ror#61 542 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 543 add x26,x26,x16 // h+=Sigma1(e) 544 eor x19,x19,x20 // Maj(a,b,c) 545 eor x17,x12,x27,ror#39 // Sigma0(a) 546 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 547 add x5,x5,x14 548 add x22,x22,x26 // d+=h 549 add x26,x26,x19 // h+=Maj(a,b,c) 550 ldr x19,[x30],#8 // *K++, x28 in next round 551 add x5,x5,x11 552 add x26,x26,x17 // h+=Sigma0(a) 553 add x5,x5,x10 554 ldr x10,[sp,#24] 555 str x13,[sp,#16] 556 ror x16,x22,#14 557 add x25,x25,x19 // h+=K[i] 558 ror x12,x7,#1 559 and x17,x23,x22 560 ror x11,x4,#19 561 bic x19,x24,x22 562 ror x13,x26,#28 563 add x25,x25,x5 // h+=X[i] 564 eor x16,x16,x22,ror#18 565 eor x12,x12,x7,ror#8 566 orr x17,x17,x19 // Ch(e,f,g) 567 eor x19,x26,x27 // a^b, b^c in next round 568 eor x16,x16,x22,ror#41 // Sigma1(e) 569 eor x13,x13,x26,ror#34 570 add x25,x25,x17 // h+=Ch(e,f,g) 571 and x28,x28,x19 // (b^c)&=(a^b) 572 eor x11,x11,x4,ror#61 573 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 574 add x25,x25,x16 // h+=Sigma1(e) 575 eor x28,x28,x27 // Maj(a,b,c) 576 eor x17,x13,x26,ror#39 // Sigma0(a) 577 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 578 add x6,x6,x15 579 add x21,x21,x25 // d+=h 580 add x25,x25,x28 // h+=Maj(a,b,c) 581 ldr x28,[x30],#8 // *K++, x19 in next round 582 add x6,x6,x12 583 add x25,x25,x17 // h+=Sigma0(a) 584 add x6,x6,x11 585 ldr x11,[sp,#0] 586 str x14,[sp,#24] 587 ror x16,x21,#14 588 add x24,x24,x28 // h+=K[i] 589 ror x13,x8,#1 590 and x17,x22,x21 591 ror x12,x5,#19 592 bic x28,x23,x21 593 ror x14,x25,#28 594 add x24,x24,x6 // h+=X[i] 595 eor x16,x16,x21,ror#18 596 eor x13,x13,x8,ror#8 597 orr x17,x17,x28 // Ch(e,f,g) 598 eor x28,x25,x26 // a^b, b^c in next round 599 eor x16,x16,x21,ror#41 // Sigma1(e) 600 eor x14,x14,x25,ror#34 601 add x24,x24,x17 // h+=Ch(e,f,g) 602 and x19,x19,x28 // (b^c)&=(a^b) 603 eor x12,x12,x5,ror#61 604 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 605 add x24,x24,x16 // h+=Sigma1(e) 606 eor x19,x19,x26 // Maj(a,b,c) 607 eor x17,x14,x25,ror#39 // Sigma0(a) 608 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 609 add x7,x7,x0 610 add x20,x20,x24 // d+=h 611 add x24,x24,x19 // h+=Maj(a,b,c) 612 ldr x19,[x30],#8 // *K++, x28 in next round 613 add x7,x7,x13 614 add x24,x24,x17 // h+=Sigma0(a) 615 add x7,x7,x12 616 ldr x12,[sp,#8] 617 str x15,[sp,#0] 618 ror x16,x20,#14 619 add x23,x23,x19 // h+=K[i] 620 ror x14,x9,#1 621 and x17,x21,x20 622 ror x13,x6,#19 623 bic x19,x22,x20 624 ror x15,x24,#28 625 add x23,x23,x7 // h+=X[i] 626 eor x16,x16,x20,ror#18 627 eor x14,x14,x9,ror#8 628 orr x17,x17,x19 // Ch(e,f,g) 629 eor x19,x24,x25 // a^b, b^c in next round 630 eor x16,x16,x20,ror#41 // Sigma1(e) 631 eor x15,x15,x24,ror#34 632 add x23,x23,x17 // h+=Ch(e,f,g) 633 and x28,x28,x19 // (b^c)&=(a^b) 634 eor x13,x13,x6,ror#61 635 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 636 add x23,x23,x16 // h+=Sigma1(e) 637 eor x28,x28,x25 // Maj(a,b,c) 638 eor x17,x15,x24,ror#39 // Sigma0(a) 639 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 640 add x8,x8,x1 641 add x27,x27,x23 // d+=h 642 add x23,x23,x28 // h+=Maj(a,b,c) 643 ldr x28,[x30],#8 // *K++, x19 in next round 644 add x8,x8,x14 645 add x23,x23,x17 // h+=Sigma0(a) 646 add x8,x8,x13 647 ldr x13,[sp,#16] 648 str x0,[sp,#8] 649 ror x16,x27,#14 650 add x22,x22,x28 // h+=K[i] 651 ror x15,x10,#1 652 and x17,x20,x27 653 ror x14,x7,#19 654 bic x28,x21,x27 655 ror x0,x23,#28 656 add x22,x22,x8 // h+=X[i] 657 eor x16,x16,x27,ror#18 658 eor x15,x15,x10,ror#8 659 orr x17,x17,x28 // Ch(e,f,g) 660 eor x28,x23,x24 // a^b, b^c in next round 661 eor x16,x16,x27,ror#41 // Sigma1(e) 662 eor x0,x0,x23,ror#34 663 add x22,x22,x17 // h+=Ch(e,f,g) 664 and x19,x19,x28 // (b^c)&=(a^b) 665 eor x14,x14,x7,ror#61 666 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 667 add x22,x22,x16 // h+=Sigma1(e) 668 eor x19,x19,x24 // Maj(a,b,c) 669 eor x17,x0,x23,ror#39 // Sigma0(a) 670 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 671 add x9,x9,x2 672 add x26,x26,x22 // d+=h 673 add x22,x22,x19 // h+=Maj(a,b,c) 674 ldr x19,[x30],#8 // *K++, x28 in next round 675 add x9,x9,x15 676 add x22,x22,x17 // h+=Sigma0(a) 677 add x9,x9,x14 678 ldr x14,[sp,#24] 679 str x1,[sp,#16] 680 ror x16,x26,#14 681 add x21,x21,x19 // h+=K[i] 682 ror x0,x11,#1 683 and x17,x27,x26 684 ror x15,x8,#19 685 bic x19,x20,x26 686 ror x1,x22,#28 687 add x21,x21,x9 // h+=X[i] 688 eor x16,x16,x26,ror#18 689 eor x0,x0,x11,ror#8 690 orr x17,x17,x19 // Ch(e,f,g) 691 eor x19,x22,x23 // a^b, b^c in next round 692 eor x16,x16,x26,ror#41 // Sigma1(e) 693 eor x1,x1,x22,ror#34 694 add x21,x21,x17 // h+=Ch(e,f,g) 695 and x28,x28,x19 // (b^c)&=(a^b) 696 eor x15,x15,x8,ror#61 697 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 698 add x21,x21,x16 // h+=Sigma1(e) 699 eor x28,x28,x23 // Maj(a,b,c) 700 eor x17,x1,x22,ror#39 // Sigma0(a) 701 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 702 add x10,x10,x3 703 add x25,x25,x21 // d+=h 704 add x21,x21,x28 // h+=Maj(a,b,c) 705 ldr x28,[x30],#8 // *K++, x19 in next round 706 add x10,x10,x0 707 add x21,x21,x17 // h+=Sigma0(a) 708 add x10,x10,x15 709 ldr x15,[sp,#0] 710 str x2,[sp,#24] 711 ror x16,x25,#14 712 add x20,x20,x28 // h+=K[i] 713 ror x1,x12,#1 714 and x17,x26,x25 715 ror x0,x9,#19 716 bic x28,x27,x25 717 ror x2,x21,#28 718 add x20,x20,x10 // h+=X[i] 719 eor x16,x16,x25,ror#18 720 eor x1,x1,x12,ror#8 721 orr x17,x17,x28 // Ch(e,f,g) 722 eor x28,x21,x22 // a^b, b^c in next round 723 eor x16,x16,x25,ror#41 // Sigma1(e) 724 eor x2,x2,x21,ror#34 725 add x20,x20,x17 // h+=Ch(e,f,g) 726 and x19,x19,x28 // (b^c)&=(a^b) 727 eor x0,x0,x9,ror#61 728 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 729 add x20,x20,x16 // h+=Sigma1(e) 730 eor x19,x19,x22 // Maj(a,b,c) 731 eor x17,x2,x21,ror#39 // Sigma0(a) 732 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 733 add x11,x11,x4 734 add x24,x24,x20 // d+=h 735 add x20,x20,x19 // h+=Maj(a,b,c) 736 ldr x19,[x30],#8 // *K++, x28 in next round 737 add x11,x11,x1 738 add x20,x20,x17 // h+=Sigma0(a) 739 add x11,x11,x0 740 ldr x0,[sp,#8] 741 str x3,[sp,#0] 742 ror x16,x24,#14 743 add x27,x27,x19 // h+=K[i] 744 ror x2,x13,#1 745 and x17,x25,x24 746 ror x1,x10,#19 747 bic x19,x26,x24 748 ror x3,x20,#28 749 add x27,x27,x11 // h+=X[i] 750 eor x16,x16,x24,ror#18 751 eor x2,x2,x13,ror#8 752 orr x17,x17,x19 // Ch(e,f,g) 753 eor x19,x20,x21 // a^b, b^c in next round 754 eor x16,x16,x24,ror#41 // Sigma1(e) 755 eor x3,x3,x20,ror#34 756 add x27,x27,x17 // h+=Ch(e,f,g) 757 and x28,x28,x19 // (b^c)&=(a^b) 758 eor x1,x1,x10,ror#61 759 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 760 add x27,x27,x16 // h+=Sigma1(e) 761 eor x28,x28,x21 // Maj(a,b,c) 762 eor x17,x3,x20,ror#39 // Sigma0(a) 763 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 764 add x12,x12,x5 765 add x23,x23,x27 // d+=h 766 add x27,x27,x28 // h+=Maj(a,b,c) 767 ldr x28,[x30],#8 // *K++, x19 in next round 768 add x12,x12,x2 769 add x27,x27,x17 // h+=Sigma0(a) 770 add x12,x12,x1 771 ldr x1,[sp,#16] 772 str x4,[sp,#8] 773 ror x16,x23,#14 774 add x26,x26,x28 // h+=K[i] 775 ror x3,x14,#1 776 and x17,x24,x23 777 ror x2,x11,#19 778 bic x28,x25,x23 779 ror x4,x27,#28 780 add x26,x26,x12 // h+=X[i] 781 eor x16,x16,x23,ror#18 782 eor x3,x3,x14,ror#8 783 orr x17,x17,x28 // Ch(e,f,g) 784 eor x28,x27,x20 // a^b, b^c in next round 785 eor x16,x16,x23,ror#41 // Sigma1(e) 786 eor x4,x4,x27,ror#34 787 add x26,x26,x17 // h+=Ch(e,f,g) 788 and x19,x19,x28 // (b^c)&=(a^b) 789 eor x2,x2,x11,ror#61 790 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 791 add x26,x26,x16 // h+=Sigma1(e) 792 eor x19,x19,x20 // Maj(a,b,c) 793 eor x17,x4,x27,ror#39 // Sigma0(a) 794 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 795 add x13,x13,x6 796 add x22,x22,x26 // d+=h 797 add x26,x26,x19 // h+=Maj(a,b,c) 798 ldr x19,[x30],#8 // *K++, x28 in next round 799 add x13,x13,x3 800 add x26,x26,x17 // h+=Sigma0(a) 801 add x13,x13,x2 802 ldr x2,[sp,#24] 803 str x5,[sp,#16] 804 ror x16,x22,#14 805 add x25,x25,x19 // h+=K[i] 806 ror x4,x15,#1 807 and x17,x23,x22 808 ror x3,x12,#19 809 bic x19,x24,x22 810 ror x5,x26,#28 811 add x25,x25,x13 // h+=X[i] 812 eor x16,x16,x22,ror#18 813 eor x4,x4,x15,ror#8 814 orr x17,x17,x19 // Ch(e,f,g) 815 eor x19,x26,x27 // a^b, b^c in next round 816 eor x16,x16,x22,ror#41 // Sigma1(e) 817 eor x5,x5,x26,ror#34 818 add x25,x25,x17 // h+=Ch(e,f,g) 819 and x28,x28,x19 // (b^c)&=(a^b) 820 eor x3,x3,x12,ror#61 821 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 822 add x25,x25,x16 // h+=Sigma1(e) 823 eor x28,x28,x27 // Maj(a,b,c) 824 eor x17,x5,x26,ror#39 // Sigma0(a) 825 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 826 add x14,x14,x7 827 add x21,x21,x25 // d+=h 828 add x25,x25,x28 // h+=Maj(a,b,c) 829 ldr x28,[x30],#8 // *K++, x19 in next round 830 add x14,x14,x4 831 add x25,x25,x17 // h+=Sigma0(a) 832 add x14,x14,x3 833 ldr x3,[sp,#0] 834 str x6,[sp,#24] 835 ror x16,x21,#14 836 add x24,x24,x28 // h+=K[i] 837 ror x5,x0,#1 838 and x17,x22,x21 839 ror x4,x13,#19 840 bic x28,x23,x21 841 ror x6,x25,#28 842 add x24,x24,x14 // h+=X[i] 843 eor x16,x16,x21,ror#18 844 eor x5,x5,x0,ror#8 845 orr x17,x17,x28 // Ch(e,f,g) 846 eor x28,x25,x26 // a^b, b^c in next round 847 eor x16,x16,x21,ror#41 // Sigma1(e) 848 eor x6,x6,x25,ror#34 849 add x24,x24,x17 // h+=Ch(e,f,g) 850 and x19,x19,x28 // (b^c)&=(a^b) 851 eor x4,x4,x13,ror#61 852 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 853 add x24,x24,x16 // h+=Sigma1(e) 854 eor x19,x19,x26 // Maj(a,b,c) 855 eor x17,x6,x25,ror#39 // Sigma0(a) 856 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 857 add x15,x15,x8 858 add x20,x20,x24 // d+=h 859 add x24,x24,x19 // h+=Maj(a,b,c) 860 ldr x19,[x30],#8 // *K++, x28 in next round 861 add x15,x15,x5 862 add x24,x24,x17 // h+=Sigma0(a) 863 add x15,x15,x4 864 ldr x4,[sp,#8] 865 str x7,[sp,#0] 866 ror x16,x20,#14 867 add x23,x23,x19 // h+=K[i] 868 ror x6,x1,#1 869 and x17,x21,x20 870 ror x5,x14,#19 871 bic x19,x22,x20 872 ror x7,x24,#28 873 add x23,x23,x15 // h+=X[i] 874 eor x16,x16,x20,ror#18 875 eor x6,x6,x1,ror#8 876 orr x17,x17,x19 // Ch(e,f,g) 877 eor x19,x24,x25 // a^b, b^c in next round 878 eor x16,x16,x20,ror#41 // Sigma1(e) 879 eor x7,x7,x24,ror#34 880 add x23,x23,x17 // h+=Ch(e,f,g) 881 and x28,x28,x19 // (b^c)&=(a^b) 882 eor x5,x5,x14,ror#61 883 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 884 add x23,x23,x16 // h+=Sigma1(e) 885 eor x28,x28,x25 // Maj(a,b,c) 886 eor x17,x7,x24,ror#39 // Sigma0(a) 887 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 888 add x0,x0,x9 889 add x27,x27,x23 // d+=h 890 add x23,x23,x28 // h+=Maj(a,b,c) 891 ldr x28,[x30],#8 // *K++, x19 in next round 892 add x0,x0,x6 893 add x23,x23,x17 // h+=Sigma0(a) 894 add x0,x0,x5 895 ldr x5,[sp,#16] 896 str x8,[sp,#8] 897 ror x16,x27,#14 898 add x22,x22,x28 // h+=K[i] 899 ror x7,x2,#1 900 and x17,x20,x27 901 ror x6,x15,#19 902 bic x28,x21,x27 903 ror x8,x23,#28 904 add x22,x22,x0 // h+=X[i] 905 eor x16,x16,x27,ror#18 906 eor x7,x7,x2,ror#8 907 orr x17,x17,x28 // Ch(e,f,g) 908 eor x28,x23,x24 // a^b, b^c in next round 909 eor x16,x16,x27,ror#41 // Sigma1(e) 910 eor x8,x8,x23,ror#34 911 add x22,x22,x17 // h+=Ch(e,f,g) 912 and x19,x19,x28 // (b^c)&=(a^b) 913 eor x6,x6,x15,ror#61 914 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 915 add x22,x22,x16 // h+=Sigma1(e) 916 eor x19,x19,x24 // Maj(a,b,c) 917 eor x17,x8,x23,ror#39 // Sigma0(a) 918 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 919 add x1,x1,x10 920 add x26,x26,x22 // d+=h 921 add x22,x22,x19 // h+=Maj(a,b,c) 922 ldr x19,[x30],#8 // *K++, x28 in next round 923 add x1,x1,x7 924 add x22,x22,x17 // h+=Sigma0(a) 925 add x1,x1,x6 926 ldr x6,[sp,#24] 927 str x9,[sp,#16] 928 ror x16,x26,#14 929 add x21,x21,x19 // h+=K[i] 930 ror x8,x3,#1 931 and x17,x27,x26 932 ror x7,x0,#19 933 bic x19,x20,x26 934 ror x9,x22,#28 935 add x21,x21,x1 // h+=X[i] 936 eor x16,x16,x26,ror#18 937 eor x8,x8,x3,ror#8 938 orr x17,x17,x19 // Ch(e,f,g) 939 eor x19,x22,x23 // a^b, b^c in next round 940 eor x16,x16,x26,ror#41 // Sigma1(e) 941 eor x9,x9,x22,ror#34 942 add x21,x21,x17 // h+=Ch(e,f,g) 943 and x28,x28,x19 // (b^c)&=(a^b) 944 eor x7,x7,x0,ror#61 945 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 946 add x21,x21,x16 // h+=Sigma1(e) 947 eor x28,x28,x23 // Maj(a,b,c) 948 eor x17,x9,x22,ror#39 // Sigma0(a) 949 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 950 add x2,x2,x11 951 add x25,x25,x21 // d+=h 952 add x21,x21,x28 // h+=Maj(a,b,c) 953 ldr x28,[x30],#8 // *K++, x19 in next round 954 add x2,x2,x8 955 add x21,x21,x17 // h+=Sigma0(a) 956 add x2,x2,x7 957 ldr x7,[sp,#0] 958 str x10,[sp,#24] 959 ror x16,x25,#14 960 add x20,x20,x28 // h+=K[i] 961 ror x9,x4,#1 962 and x17,x26,x25 963 ror x8,x1,#19 964 bic x28,x27,x25 965 ror x10,x21,#28 966 add x20,x20,x2 // h+=X[i] 967 eor x16,x16,x25,ror#18 968 eor x9,x9,x4,ror#8 969 orr x17,x17,x28 // Ch(e,f,g) 970 eor x28,x21,x22 // a^b, b^c in next round 971 eor x16,x16,x25,ror#41 // Sigma1(e) 972 eor x10,x10,x21,ror#34 973 add x20,x20,x17 // h+=Ch(e,f,g) 974 and x19,x19,x28 // (b^c)&=(a^b) 975 eor x8,x8,x1,ror#61 976 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 977 add x20,x20,x16 // h+=Sigma1(e) 978 eor x19,x19,x22 // Maj(a,b,c) 979 eor x17,x10,x21,ror#39 // Sigma0(a) 980 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 981 add x3,x3,x12 982 add x24,x24,x20 // d+=h 983 add x20,x20,x19 // h+=Maj(a,b,c) 984 ldr x19,[x30],#8 // *K++, x28 in next round 985 add x3,x3,x9 986 add x20,x20,x17 // h+=Sigma0(a) 987 add x3,x3,x8 988 cbnz x19,Loop_16_xx 989 990 ldp x0,x2,[x29,#96] 991 ldr x1,[x29,#112] 992 sub x30,x30,#648 // rewind 993 994 ldp x3,x4,[x0] 995 ldp x5,x6,[x0,#2*8] 996 add x1,x1,#14*8 // advance input pointer 997 ldp x7,x8,[x0,#4*8] 998 add x20,x20,x3 999 ldp x9,x10,[x0,#6*8] 1000 add x21,x21,x4 1001 add x22,x22,x5 1002 add x23,x23,x6 1003 stp x20,x21,[x0] 1004 add x24,x24,x7 1005 add x25,x25,x8 1006 stp x22,x23,[x0,#2*8] 1007 add x26,x26,x9 1008 add x27,x27,x10 1009 cmp x1,x2 1010 stp x24,x25,[x0,#4*8] 1011 stp x26,x27,[x0,#6*8] 1012 b.ne Loop 1013 1014 ldp x19,x20,[x29,#16] 1015 add sp,sp,#4*8 1016 ldp x21,x22,[x29,#32] 1017 ldp x23,x24,[x29,#48] 1018 ldp x25,x26,[x29,#64] 1019 ldp x27,x28,[x29,#80] 1020 ldp x29,x30,[sp],#128 1021 AARCH64_VALIDATE_LINK_REGISTER 1022 ret 1023 1024 1025.section .rodata 1026.align 6 1027 1028LK512: 1029.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1030.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1031.quad 0x3956c25bf348b538,0x59f111f1b605d019 1032.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1033.quad 0xd807aa98a3030242,0x12835b0145706fbe 1034.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1035.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1036.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1037.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1038.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1039.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1040.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1041.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1042.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1043.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1044.quad 0x06ca6351e003826f,0x142929670a0e6e70 1045.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1046.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1047.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1048.quad 0x81c2c92e47edaee6,0x92722c851482353b 1049.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1050.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1051.quad 0xd192e819d6ef5218,0xd69906245565a910 1052.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1053.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1054.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1055.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1056.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1057.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1058.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1059.quad 0x90befffa23631e28,0xa4506cebde82bde9 1060.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1061.quad 0xca273eceea26619c,0xd186b8c721c0c207 1062.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1063.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1064.quad 0x113f9804bef90dae,0x1b710b35131c471b 1065.quad 0x28db77f523047d84,0x32caab7b40c72493 1066.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1067.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1068.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1069.quad 0 // terminator 1070 1071.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1072.align 2 1073.align 2 1074.text 1075#ifndef __KERNEL__ 1076.globl sha512_block_data_order_hw 1077 1078.def sha512_block_data_order_hw 1079 .type 32 1080.endef 1081.align 6 1082sha512_block_data_order_hw: 1083 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 1084 AARCH64_VALID_CALL_TARGET 1085 stp x29,x30,[sp,#-16]! 1086 add x29,sp,#0 1087 1088 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1089 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1090 1091 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1092 adrp x3,LK512 1093 add x3,x3,:lo12:LK512 1094 1095 rev64 v16.16b,v16.16b 1096 rev64 v17.16b,v17.16b 1097 rev64 v18.16b,v18.16b 1098 rev64 v19.16b,v19.16b 1099 rev64 v20.16b,v20.16b 1100 rev64 v21.16b,v21.16b 1101 rev64 v22.16b,v22.16b 1102 rev64 v23.16b,v23.16b 1103 b Loop_hw 1104 1105.align 4 1106Loop_hw: 1107 ld1 {v24.2d},[x3],#16 1108 subs x2,x2,#1 1109 sub x4,x1,#128 1110 orr v26.16b,v0.16b,v0.16b // offload 1111 orr v27.16b,v1.16b,v1.16b 1112 orr v28.16b,v2.16b,v2.16b 1113 orr v29.16b,v3.16b,v3.16b 1114 csel x1,x1,x4,ne // conditional rewind 1115 add v24.2d,v24.2d,v16.2d 1116 ld1 {v25.2d},[x3],#16 1117 ext v24.16b,v24.16b,v24.16b,#8 1118 ext v5.16b,v2.16b,v3.16b,#8 1119 ext v6.16b,v1.16b,v2.16b,#8 1120 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1121.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1122 ext v7.16b,v20.16b,v21.16b,#8 1123.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1124.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1125 add v4.2d,v1.2d,v3.2d // "D + T1" 1126.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1127 add v25.2d,v25.2d,v17.2d 1128 ld1 {v24.2d},[x3],#16 1129 ext v25.16b,v25.16b,v25.16b,#8 1130 ext v5.16b,v4.16b,v2.16b,#8 1131 ext v6.16b,v0.16b,v4.16b,#8 1132 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1133.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1134 ext v7.16b,v21.16b,v22.16b,#8 1135.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1136.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1137 add v1.2d,v0.2d,v2.2d // "D + T1" 1138.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1139 add v24.2d,v24.2d,v18.2d 1140 ld1 {v25.2d},[x3],#16 1141 ext v24.16b,v24.16b,v24.16b,#8 1142 ext v5.16b,v1.16b,v4.16b,#8 1143 ext v6.16b,v3.16b,v1.16b,#8 1144 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1145.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1146 ext v7.16b,v22.16b,v23.16b,#8 1147.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1148.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1149 add v0.2d,v3.2d,v4.2d // "D + T1" 1150.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1151 add v25.2d,v25.2d,v19.2d 1152 ld1 {v24.2d},[x3],#16 1153 ext v25.16b,v25.16b,v25.16b,#8 1154 ext v5.16b,v0.16b,v1.16b,#8 1155 ext v6.16b,v2.16b,v0.16b,#8 1156 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1157.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1158 ext v7.16b,v23.16b,v16.16b,#8 1159.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1160.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1161 add v3.2d,v2.2d,v1.2d // "D + T1" 1162.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1163 add v24.2d,v24.2d,v20.2d 1164 ld1 {v25.2d},[x3],#16 1165 ext v24.16b,v24.16b,v24.16b,#8 1166 ext v5.16b,v3.16b,v0.16b,#8 1167 ext v6.16b,v4.16b,v3.16b,#8 1168 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1169.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1170 ext v7.16b,v16.16b,v17.16b,#8 1171.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1172.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1173 add v2.2d,v4.2d,v0.2d // "D + T1" 1174.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1175 add v25.2d,v25.2d,v21.2d 1176 ld1 {v24.2d},[x3],#16 1177 ext v25.16b,v25.16b,v25.16b,#8 1178 ext v5.16b,v2.16b,v3.16b,#8 1179 ext v6.16b,v1.16b,v2.16b,#8 1180 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1181.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1182 ext v7.16b,v17.16b,v18.16b,#8 1183.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1184.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1185 add v4.2d,v1.2d,v3.2d // "D + T1" 1186.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1187 add v24.2d,v24.2d,v22.2d 1188 ld1 {v25.2d},[x3],#16 1189 ext v24.16b,v24.16b,v24.16b,#8 1190 ext v5.16b,v4.16b,v2.16b,#8 1191 ext v6.16b,v0.16b,v4.16b,#8 1192 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1193.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1194 ext v7.16b,v18.16b,v19.16b,#8 1195.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1196.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1197 add v1.2d,v0.2d,v2.2d // "D + T1" 1198.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1199 add v25.2d,v25.2d,v23.2d 1200 ld1 {v24.2d},[x3],#16 1201 ext v25.16b,v25.16b,v25.16b,#8 1202 ext v5.16b,v1.16b,v4.16b,#8 1203 ext v6.16b,v3.16b,v1.16b,#8 1204 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1205.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1206 ext v7.16b,v19.16b,v20.16b,#8 1207.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1208.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1209 add v0.2d,v3.2d,v4.2d // "D + T1" 1210.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1211 add v24.2d,v24.2d,v16.2d 1212 ld1 {v25.2d},[x3],#16 1213 ext v24.16b,v24.16b,v24.16b,#8 1214 ext v5.16b,v0.16b,v1.16b,#8 1215 ext v6.16b,v2.16b,v0.16b,#8 1216 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1217.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1218 ext v7.16b,v20.16b,v21.16b,#8 1219.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1220.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1221 add v3.2d,v2.2d,v1.2d // "D + T1" 1222.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1223 add v25.2d,v25.2d,v17.2d 1224 ld1 {v24.2d},[x3],#16 1225 ext v25.16b,v25.16b,v25.16b,#8 1226 ext v5.16b,v3.16b,v0.16b,#8 1227 ext v6.16b,v4.16b,v3.16b,#8 1228 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1229.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1230 ext v7.16b,v21.16b,v22.16b,#8 1231.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1232.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1233 add v2.2d,v4.2d,v0.2d // "D + T1" 1234.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1235 add v24.2d,v24.2d,v18.2d 1236 ld1 {v25.2d},[x3],#16 1237 ext v24.16b,v24.16b,v24.16b,#8 1238 ext v5.16b,v2.16b,v3.16b,#8 1239 ext v6.16b,v1.16b,v2.16b,#8 1240 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1241.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1242 ext v7.16b,v22.16b,v23.16b,#8 1243.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1244.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1245 add v4.2d,v1.2d,v3.2d // "D + T1" 1246.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1247 add v25.2d,v25.2d,v19.2d 1248 ld1 {v24.2d},[x3],#16 1249 ext v25.16b,v25.16b,v25.16b,#8 1250 ext v5.16b,v4.16b,v2.16b,#8 1251 ext v6.16b,v0.16b,v4.16b,#8 1252 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1253.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1254 ext v7.16b,v23.16b,v16.16b,#8 1255.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1256.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1257 add v1.2d,v0.2d,v2.2d // "D + T1" 1258.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1259 add v24.2d,v24.2d,v20.2d 1260 ld1 {v25.2d},[x3],#16 1261 ext v24.16b,v24.16b,v24.16b,#8 1262 ext v5.16b,v1.16b,v4.16b,#8 1263 ext v6.16b,v3.16b,v1.16b,#8 1264 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1265.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1266 ext v7.16b,v16.16b,v17.16b,#8 1267.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1268.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1269 add v0.2d,v3.2d,v4.2d // "D + T1" 1270.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1271 add v25.2d,v25.2d,v21.2d 1272 ld1 {v24.2d},[x3],#16 1273 ext v25.16b,v25.16b,v25.16b,#8 1274 ext v5.16b,v0.16b,v1.16b,#8 1275 ext v6.16b,v2.16b,v0.16b,#8 1276 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1277.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1278 ext v7.16b,v17.16b,v18.16b,#8 1279.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1280.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1281 add v3.2d,v2.2d,v1.2d // "D + T1" 1282.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1283 add v24.2d,v24.2d,v22.2d 1284 ld1 {v25.2d},[x3],#16 1285 ext v24.16b,v24.16b,v24.16b,#8 1286 ext v5.16b,v3.16b,v0.16b,#8 1287 ext v6.16b,v4.16b,v3.16b,#8 1288 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1289.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1290 ext v7.16b,v18.16b,v19.16b,#8 1291.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1292.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1293 add v2.2d,v4.2d,v0.2d // "D + T1" 1294.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1295 add v25.2d,v25.2d,v23.2d 1296 ld1 {v24.2d},[x3],#16 1297 ext v25.16b,v25.16b,v25.16b,#8 1298 ext v5.16b,v2.16b,v3.16b,#8 1299 ext v6.16b,v1.16b,v2.16b,#8 1300 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1301.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1302 ext v7.16b,v19.16b,v20.16b,#8 1303.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1304.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1305 add v4.2d,v1.2d,v3.2d // "D + T1" 1306.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1307 add v24.2d,v24.2d,v16.2d 1308 ld1 {v25.2d},[x3],#16 1309 ext v24.16b,v24.16b,v24.16b,#8 1310 ext v5.16b,v4.16b,v2.16b,#8 1311 ext v6.16b,v0.16b,v4.16b,#8 1312 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1313.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1314 ext v7.16b,v20.16b,v21.16b,#8 1315.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1316.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1317 add v1.2d,v0.2d,v2.2d // "D + T1" 1318.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1319 add v25.2d,v25.2d,v17.2d 1320 ld1 {v24.2d},[x3],#16 1321 ext v25.16b,v25.16b,v25.16b,#8 1322 ext v5.16b,v1.16b,v4.16b,#8 1323 ext v6.16b,v3.16b,v1.16b,#8 1324 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1325.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1326 ext v7.16b,v21.16b,v22.16b,#8 1327.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1328.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1329 add v0.2d,v3.2d,v4.2d // "D + T1" 1330.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1331 add v24.2d,v24.2d,v18.2d 1332 ld1 {v25.2d},[x3],#16 1333 ext v24.16b,v24.16b,v24.16b,#8 1334 ext v5.16b,v0.16b,v1.16b,#8 1335 ext v6.16b,v2.16b,v0.16b,#8 1336 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1337.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1338 ext v7.16b,v22.16b,v23.16b,#8 1339.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1340.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1341 add v3.2d,v2.2d,v1.2d // "D + T1" 1342.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1343 add v25.2d,v25.2d,v19.2d 1344 ld1 {v24.2d},[x3],#16 1345 ext v25.16b,v25.16b,v25.16b,#8 1346 ext v5.16b,v3.16b,v0.16b,#8 1347 ext v6.16b,v4.16b,v3.16b,#8 1348 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1349.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1350 ext v7.16b,v23.16b,v16.16b,#8 1351.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1352.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1353 add v2.2d,v4.2d,v0.2d // "D + T1" 1354.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1355 add v24.2d,v24.2d,v20.2d 1356 ld1 {v25.2d},[x3],#16 1357 ext v24.16b,v24.16b,v24.16b,#8 1358 ext v5.16b,v2.16b,v3.16b,#8 1359 ext v6.16b,v1.16b,v2.16b,#8 1360 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1361.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1362 ext v7.16b,v16.16b,v17.16b,#8 1363.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1364.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1365 add v4.2d,v1.2d,v3.2d // "D + T1" 1366.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1367 add v25.2d,v25.2d,v21.2d 1368 ld1 {v24.2d},[x3],#16 1369 ext v25.16b,v25.16b,v25.16b,#8 1370 ext v5.16b,v4.16b,v2.16b,#8 1371 ext v6.16b,v0.16b,v4.16b,#8 1372 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1373.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1374 ext v7.16b,v17.16b,v18.16b,#8 1375.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1376.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1377 add v1.2d,v0.2d,v2.2d // "D + T1" 1378.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1379 add v24.2d,v24.2d,v22.2d 1380 ld1 {v25.2d},[x3],#16 1381 ext v24.16b,v24.16b,v24.16b,#8 1382 ext v5.16b,v1.16b,v4.16b,#8 1383 ext v6.16b,v3.16b,v1.16b,#8 1384 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1385.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1386 ext v7.16b,v18.16b,v19.16b,#8 1387.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1388.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1389 add v0.2d,v3.2d,v4.2d // "D + T1" 1390.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1391 add v25.2d,v25.2d,v23.2d 1392 ld1 {v24.2d},[x3],#16 1393 ext v25.16b,v25.16b,v25.16b,#8 1394 ext v5.16b,v0.16b,v1.16b,#8 1395 ext v6.16b,v2.16b,v0.16b,#8 1396 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1397.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1398 ext v7.16b,v19.16b,v20.16b,#8 1399.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1400.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1401 add v3.2d,v2.2d,v1.2d // "D + T1" 1402.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1403 add v24.2d,v24.2d,v16.2d 1404 ld1 {v25.2d},[x3],#16 1405 ext v24.16b,v24.16b,v24.16b,#8 1406 ext v5.16b,v3.16b,v0.16b,#8 1407 ext v6.16b,v4.16b,v3.16b,#8 1408 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1409.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1410 ext v7.16b,v20.16b,v21.16b,#8 1411.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1412.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1413 add v2.2d,v4.2d,v0.2d // "D + T1" 1414.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1415 add v25.2d,v25.2d,v17.2d 1416 ld1 {v24.2d},[x3],#16 1417 ext v25.16b,v25.16b,v25.16b,#8 1418 ext v5.16b,v2.16b,v3.16b,#8 1419 ext v6.16b,v1.16b,v2.16b,#8 1420 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1421.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1422 ext v7.16b,v21.16b,v22.16b,#8 1423.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1424.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1425 add v4.2d,v1.2d,v3.2d // "D + T1" 1426.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1427 add v24.2d,v24.2d,v18.2d 1428 ld1 {v25.2d},[x3],#16 1429 ext v24.16b,v24.16b,v24.16b,#8 1430 ext v5.16b,v4.16b,v2.16b,#8 1431 ext v6.16b,v0.16b,v4.16b,#8 1432 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1433.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1434 ext v7.16b,v22.16b,v23.16b,#8 1435.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1436.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1437 add v1.2d,v0.2d,v2.2d // "D + T1" 1438.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1439 add v25.2d,v25.2d,v19.2d 1440 ld1 {v24.2d},[x3],#16 1441 ext v25.16b,v25.16b,v25.16b,#8 1442 ext v5.16b,v1.16b,v4.16b,#8 1443 ext v6.16b,v3.16b,v1.16b,#8 1444 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1445.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1446 ext v7.16b,v23.16b,v16.16b,#8 1447.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1448.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1449 add v0.2d,v3.2d,v4.2d // "D + T1" 1450.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1451 add v24.2d,v24.2d,v20.2d 1452 ld1 {v25.2d},[x3],#16 1453 ext v24.16b,v24.16b,v24.16b,#8 1454 ext v5.16b,v0.16b,v1.16b,#8 1455 ext v6.16b,v2.16b,v0.16b,#8 1456 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1457.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1458 ext v7.16b,v16.16b,v17.16b,#8 1459.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1460.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1461 add v3.2d,v2.2d,v1.2d // "D + T1" 1462.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1463 add v25.2d,v25.2d,v21.2d 1464 ld1 {v24.2d},[x3],#16 1465 ext v25.16b,v25.16b,v25.16b,#8 1466 ext v5.16b,v3.16b,v0.16b,#8 1467 ext v6.16b,v4.16b,v3.16b,#8 1468 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1469.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1470 ext v7.16b,v17.16b,v18.16b,#8 1471.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1472.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1473 add v2.2d,v4.2d,v0.2d // "D + T1" 1474.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1475 add v24.2d,v24.2d,v22.2d 1476 ld1 {v25.2d},[x3],#16 1477 ext v24.16b,v24.16b,v24.16b,#8 1478 ext v5.16b,v2.16b,v3.16b,#8 1479 ext v6.16b,v1.16b,v2.16b,#8 1480 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1481.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1482 ext v7.16b,v18.16b,v19.16b,#8 1483.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1484.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1485 add v4.2d,v1.2d,v3.2d // "D + T1" 1486.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1487 add v25.2d,v25.2d,v23.2d 1488 ld1 {v24.2d},[x3],#16 1489 ext v25.16b,v25.16b,v25.16b,#8 1490 ext v5.16b,v4.16b,v2.16b,#8 1491 ext v6.16b,v0.16b,v4.16b,#8 1492 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1493.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1494 ext v7.16b,v19.16b,v20.16b,#8 1495.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1496.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1497 add v1.2d,v0.2d,v2.2d // "D + T1" 1498.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1499 ld1 {v25.2d},[x3],#16 1500 add v24.2d,v24.2d,v16.2d 1501 ld1 {v16.16b},[x1],#16 // load next input 1502 ext v24.16b,v24.16b,v24.16b,#8 1503 ext v5.16b,v1.16b,v4.16b,#8 1504 ext v6.16b,v3.16b,v1.16b,#8 1505 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1506.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1507 rev64 v16.16b,v16.16b 1508 add v0.2d,v3.2d,v4.2d // "D + T1" 1509.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1510 ld1 {v24.2d},[x3],#16 1511 add v25.2d,v25.2d,v17.2d 1512 ld1 {v17.16b},[x1],#16 // load next input 1513 ext v25.16b,v25.16b,v25.16b,#8 1514 ext v5.16b,v0.16b,v1.16b,#8 1515 ext v6.16b,v2.16b,v0.16b,#8 1516 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1517.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1518 rev64 v17.16b,v17.16b 1519 add v3.2d,v2.2d,v1.2d // "D + T1" 1520.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1521 ld1 {v25.2d},[x3],#16 1522 add v24.2d,v24.2d,v18.2d 1523 ld1 {v18.16b},[x1],#16 // load next input 1524 ext v24.16b,v24.16b,v24.16b,#8 1525 ext v5.16b,v3.16b,v0.16b,#8 1526 ext v6.16b,v4.16b,v3.16b,#8 1527 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1528.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1529 rev64 v18.16b,v18.16b 1530 add v2.2d,v4.2d,v0.2d // "D + T1" 1531.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1532 ld1 {v24.2d},[x3],#16 1533 add v25.2d,v25.2d,v19.2d 1534 ld1 {v19.16b},[x1],#16 // load next input 1535 ext v25.16b,v25.16b,v25.16b,#8 1536 ext v5.16b,v2.16b,v3.16b,#8 1537 ext v6.16b,v1.16b,v2.16b,#8 1538 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1539.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1540 rev64 v19.16b,v19.16b 1541 add v4.2d,v1.2d,v3.2d // "D + T1" 1542.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1543 ld1 {v25.2d},[x3],#16 1544 add v24.2d,v24.2d,v20.2d 1545 ld1 {v20.16b},[x1],#16 // load next input 1546 ext v24.16b,v24.16b,v24.16b,#8 1547 ext v5.16b,v4.16b,v2.16b,#8 1548 ext v6.16b,v0.16b,v4.16b,#8 1549 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1550.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1551 rev64 v20.16b,v20.16b 1552 add v1.2d,v0.2d,v2.2d // "D + T1" 1553.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1554 ld1 {v24.2d},[x3],#16 1555 add v25.2d,v25.2d,v21.2d 1556 ld1 {v21.16b},[x1],#16 // load next input 1557 ext v25.16b,v25.16b,v25.16b,#8 1558 ext v5.16b,v1.16b,v4.16b,#8 1559 ext v6.16b,v3.16b,v1.16b,#8 1560 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1561.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1562 rev64 v21.16b,v21.16b 1563 add v0.2d,v3.2d,v4.2d // "D + T1" 1564.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1565 ld1 {v25.2d},[x3],#16 1566 add v24.2d,v24.2d,v22.2d 1567 ld1 {v22.16b},[x1],#16 // load next input 1568 ext v24.16b,v24.16b,v24.16b,#8 1569 ext v5.16b,v0.16b,v1.16b,#8 1570 ext v6.16b,v2.16b,v0.16b,#8 1571 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1572.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1573 rev64 v22.16b,v22.16b 1574 add v3.2d,v2.2d,v1.2d // "D + T1" 1575.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1576 sub x3,x3,#80*8 // rewind 1577 add v25.2d,v25.2d,v23.2d 1578 ld1 {v23.16b},[x1],#16 // load next input 1579 ext v25.16b,v25.16b,v25.16b,#8 1580 ext v5.16b,v3.16b,v0.16b,#8 1581 ext v6.16b,v4.16b,v3.16b,#8 1582 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1583.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1584 rev64 v23.16b,v23.16b 1585 add v2.2d,v4.2d,v0.2d // "D + T1" 1586.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1587 add v0.2d,v0.2d,v26.2d // accumulate 1588 add v1.2d,v1.2d,v27.2d 1589 add v2.2d,v2.2d,v28.2d 1590 add v3.2d,v3.2d,v29.2d 1591 1592 cbnz x2,Loop_hw 1593 1594 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1595 1596 ldr x29,[sp],#16 1597 ret 1598 1599#endif 1600#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 1601