1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(_WIN32) 7// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 8// 9// Licensed under the OpenSSL license (the "License"). You may not use 10// this file except in compliance with the License. You can obtain a copy 11// in the file LICENSE in the source distribution or at 12// https://www.openssl.org/source/license.html 13 14// ==================================================================== 15// Written by Andy Polyakov <[email protected]> for the OpenSSL 16// project. The module is, however, dual licensed under OpenSSL and 17// CRYPTOGAMS licenses depending on where you obtain it. For further 18// details see http://www.openssl.org/~appro/cryptogams/. 19// 20// Permission to use under GPLv2 terms is granted. 21// ==================================================================== 22// 23// SHA256/512 for ARMv8. 24// 25// Performance in cycles per processed byte and improvement coefficient 26// over code generated with "default" compiler: 27// 28// SHA256-hw SHA256(*) SHA512 29// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 30// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 31// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 32// Denver 2.01 10.5 (+26%) 6.70 (+8%) 33// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 34// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 35// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 36// 37// (*) Software SHA256 results are of lesser relevance, presented 38// mostly for informational purposes. 39// (**) The result is a trade-off: it's possible to improve it by 40// 10% (or by 1 cycle per round), but at the cost of 20% loss 41// on Cortex-A53 (or by 4 cycles per round). 42// (***) Super-impressive coefficients over gcc-generated code are 43// indication of some compiler "pathology", most notably code 44// generated with -mgeneral-regs-only is significantly faster 45// and the gap is only 40-90%. 46 47#ifndef __KERNEL__ 48# include <ring-core/arm_arch.h> 49#endif 50 51.text 52 53 54 55.globl sha512_block_data_order 56 57.def sha512_block_data_order 58 .type 32 59.endef 60.align 6 61sha512_block_data_order: 62 AARCH64_VALID_CALL_TARGET 63#ifndef __KERNEL__ 64#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 65 adrp x16,:pg_hi21_nc:OPENSSL_armcap_P 66#else 67 adrp x16,OPENSSL_armcap_P 68#endif 69 ldr w16,[x16,:lo12:OPENSSL_armcap_P] 70 tst w16,#ARMV8_SHA512 71 b.ne Lv8_entry 72#endif 73 AARCH64_SIGN_LINK_REGISTER 74 stp x29,x30,[sp,#-128]! 75 add x29,sp,#0 76 77 stp x19,x20,[sp,#16] 78 stp x21,x22,[sp,#32] 79 stp x23,x24,[sp,#48] 80 stp x25,x26,[sp,#64] 81 stp x27,x28,[sp,#80] 82 sub sp,sp,#4*8 83 84 ldp x20,x21,[x0] // load context 85 ldp x22,x23,[x0,#2*8] 86 ldp x24,x25,[x0,#4*8] 87 add x2,x1,x2,lsl#7 // end of input 88 ldp x26,x27,[x0,#6*8] 89 adrp x30,LK512 90 add x30,x30,:lo12:LK512 91 stp x0,x2,[x29,#96] 92 93Loop: 94 ldp x3,x4,[x1],#2*8 95 ldr x19,[x30],#8 // *K++ 96 eor x28,x21,x22 // magic seed 97 str x1,[x29,#112] 98#ifndef __AARCH64EB__ 99 rev x3,x3 // 0 100#endif 101 ror x16,x24,#14 102 add x27,x27,x19 // h+=K[i] 103 eor x6,x24,x24,ror#23 104 and x17,x25,x24 105 bic x19,x26,x24 106 add x27,x27,x3 // h+=X[i] 107 orr x17,x17,x19 // Ch(e,f,g) 108 eor x19,x20,x21 // a^b, b^c in next round 109 eor x16,x16,x6,ror#18 // Sigma1(e) 110 ror x6,x20,#28 111 add x27,x27,x17 // h+=Ch(e,f,g) 112 eor x17,x20,x20,ror#5 113 add x27,x27,x16 // h+=Sigma1(e) 114 and x28,x28,x19 // (b^c)&=(a^b) 115 add x23,x23,x27 // d+=h 116 eor x28,x28,x21 // Maj(a,b,c) 117 eor x17,x6,x17,ror#34 // Sigma0(a) 118 add x27,x27,x28 // h+=Maj(a,b,c) 119 ldr x28,[x30],#8 // *K++, x19 in next round 120 //add x27,x27,x17 // h+=Sigma0(a) 121#ifndef __AARCH64EB__ 122 rev x4,x4 // 1 123#endif 124 ldp x5,x6,[x1],#2*8 125 add x27,x27,x17 // h+=Sigma0(a) 126 ror x16,x23,#14 127 add x26,x26,x28 // h+=K[i] 128 eor x7,x23,x23,ror#23 129 and x17,x24,x23 130 bic x28,x25,x23 131 add x26,x26,x4 // h+=X[i] 132 orr x17,x17,x28 // Ch(e,f,g) 133 eor x28,x27,x20 // a^b, b^c in next round 134 eor x16,x16,x7,ror#18 // Sigma1(e) 135 ror x7,x27,#28 136 add x26,x26,x17 // h+=Ch(e,f,g) 137 eor x17,x27,x27,ror#5 138 add x26,x26,x16 // h+=Sigma1(e) 139 and x19,x19,x28 // (b^c)&=(a^b) 140 add x22,x22,x26 // d+=h 141 eor x19,x19,x20 // Maj(a,b,c) 142 eor x17,x7,x17,ror#34 // Sigma0(a) 143 add x26,x26,x19 // h+=Maj(a,b,c) 144 ldr x19,[x30],#8 // *K++, x28 in next round 145 //add x26,x26,x17 // h+=Sigma0(a) 146#ifndef __AARCH64EB__ 147 rev x5,x5 // 2 148#endif 149 add x26,x26,x17 // h+=Sigma0(a) 150 ror x16,x22,#14 151 add x25,x25,x19 // h+=K[i] 152 eor x8,x22,x22,ror#23 153 and x17,x23,x22 154 bic x19,x24,x22 155 add x25,x25,x5 // h+=X[i] 156 orr x17,x17,x19 // Ch(e,f,g) 157 eor x19,x26,x27 // a^b, b^c in next round 158 eor x16,x16,x8,ror#18 // Sigma1(e) 159 ror x8,x26,#28 160 add x25,x25,x17 // h+=Ch(e,f,g) 161 eor x17,x26,x26,ror#5 162 add x25,x25,x16 // h+=Sigma1(e) 163 and x28,x28,x19 // (b^c)&=(a^b) 164 add x21,x21,x25 // d+=h 165 eor x28,x28,x27 // Maj(a,b,c) 166 eor x17,x8,x17,ror#34 // Sigma0(a) 167 add x25,x25,x28 // h+=Maj(a,b,c) 168 ldr x28,[x30],#8 // *K++, x19 in next round 169 //add x25,x25,x17 // h+=Sigma0(a) 170#ifndef __AARCH64EB__ 171 rev x6,x6 // 3 172#endif 173 ldp x7,x8,[x1],#2*8 174 add x25,x25,x17 // h+=Sigma0(a) 175 ror x16,x21,#14 176 add x24,x24,x28 // h+=K[i] 177 eor x9,x21,x21,ror#23 178 and x17,x22,x21 179 bic x28,x23,x21 180 add x24,x24,x6 // h+=X[i] 181 orr x17,x17,x28 // Ch(e,f,g) 182 eor x28,x25,x26 // a^b, b^c in next round 183 eor x16,x16,x9,ror#18 // Sigma1(e) 184 ror x9,x25,#28 185 add x24,x24,x17 // h+=Ch(e,f,g) 186 eor x17,x25,x25,ror#5 187 add x24,x24,x16 // h+=Sigma1(e) 188 and x19,x19,x28 // (b^c)&=(a^b) 189 add x20,x20,x24 // d+=h 190 eor x19,x19,x26 // Maj(a,b,c) 191 eor x17,x9,x17,ror#34 // Sigma0(a) 192 add x24,x24,x19 // h+=Maj(a,b,c) 193 ldr x19,[x30],#8 // *K++, x28 in next round 194 //add x24,x24,x17 // h+=Sigma0(a) 195#ifndef __AARCH64EB__ 196 rev x7,x7 // 4 197#endif 198 add x24,x24,x17 // h+=Sigma0(a) 199 ror x16,x20,#14 200 add x23,x23,x19 // h+=K[i] 201 eor x10,x20,x20,ror#23 202 and x17,x21,x20 203 bic x19,x22,x20 204 add x23,x23,x7 // h+=X[i] 205 orr x17,x17,x19 // Ch(e,f,g) 206 eor x19,x24,x25 // a^b, b^c in next round 207 eor x16,x16,x10,ror#18 // Sigma1(e) 208 ror x10,x24,#28 209 add x23,x23,x17 // h+=Ch(e,f,g) 210 eor x17,x24,x24,ror#5 211 add x23,x23,x16 // h+=Sigma1(e) 212 and x28,x28,x19 // (b^c)&=(a^b) 213 add x27,x27,x23 // d+=h 214 eor x28,x28,x25 // Maj(a,b,c) 215 eor x17,x10,x17,ror#34 // Sigma0(a) 216 add x23,x23,x28 // h+=Maj(a,b,c) 217 ldr x28,[x30],#8 // *K++, x19 in next round 218 //add x23,x23,x17 // h+=Sigma0(a) 219#ifndef __AARCH64EB__ 220 rev x8,x8 // 5 221#endif 222 ldp x9,x10,[x1],#2*8 223 add x23,x23,x17 // h+=Sigma0(a) 224 ror x16,x27,#14 225 add x22,x22,x28 // h+=K[i] 226 eor x11,x27,x27,ror#23 227 and x17,x20,x27 228 bic x28,x21,x27 229 add x22,x22,x8 // h+=X[i] 230 orr x17,x17,x28 // Ch(e,f,g) 231 eor x28,x23,x24 // a^b, b^c in next round 232 eor x16,x16,x11,ror#18 // Sigma1(e) 233 ror x11,x23,#28 234 add x22,x22,x17 // h+=Ch(e,f,g) 235 eor x17,x23,x23,ror#5 236 add x22,x22,x16 // h+=Sigma1(e) 237 and x19,x19,x28 // (b^c)&=(a^b) 238 add x26,x26,x22 // d+=h 239 eor x19,x19,x24 // Maj(a,b,c) 240 eor x17,x11,x17,ror#34 // Sigma0(a) 241 add x22,x22,x19 // h+=Maj(a,b,c) 242 ldr x19,[x30],#8 // *K++, x28 in next round 243 //add x22,x22,x17 // h+=Sigma0(a) 244#ifndef __AARCH64EB__ 245 rev x9,x9 // 6 246#endif 247 add x22,x22,x17 // h+=Sigma0(a) 248 ror x16,x26,#14 249 add x21,x21,x19 // h+=K[i] 250 eor x12,x26,x26,ror#23 251 and x17,x27,x26 252 bic x19,x20,x26 253 add x21,x21,x9 // h+=X[i] 254 orr x17,x17,x19 // Ch(e,f,g) 255 eor x19,x22,x23 // a^b, b^c in next round 256 eor x16,x16,x12,ror#18 // Sigma1(e) 257 ror x12,x22,#28 258 add x21,x21,x17 // h+=Ch(e,f,g) 259 eor x17,x22,x22,ror#5 260 add x21,x21,x16 // h+=Sigma1(e) 261 and x28,x28,x19 // (b^c)&=(a^b) 262 add x25,x25,x21 // d+=h 263 eor x28,x28,x23 // Maj(a,b,c) 264 eor x17,x12,x17,ror#34 // Sigma0(a) 265 add x21,x21,x28 // h+=Maj(a,b,c) 266 ldr x28,[x30],#8 // *K++, x19 in next round 267 //add x21,x21,x17 // h+=Sigma0(a) 268#ifndef __AARCH64EB__ 269 rev x10,x10 // 7 270#endif 271 ldp x11,x12,[x1],#2*8 272 add x21,x21,x17 // h+=Sigma0(a) 273 ror x16,x25,#14 274 add x20,x20,x28 // h+=K[i] 275 eor x13,x25,x25,ror#23 276 and x17,x26,x25 277 bic x28,x27,x25 278 add x20,x20,x10 // h+=X[i] 279 orr x17,x17,x28 // Ch(e,f,g) 280 eor x28,x21,x22 // a^b, b^c in next round 281 eor x16,x16,x13,ror#18 // Sigma1(e) 282 ror x13,x21,#28 283 add x20,x20,x17 // h+=Ch(e,f,g) 284 eor x17,x21,x21,ror#5 285 add x20,x20,x16 // h+=Sigma1(e) 286 and x19,x19,x28 // (b^c)&=(a^b) 287 add x24,x24,x20 // d+=h 288 eor x19,x19,x22 // Maj(a,b,c) 289 eor x17,x13,x17,ror#34 // Sigma0(a) 290 add x20,x20,x19 // h+=Maj(a,b,c) 291 ldr x19,[x30],#8 // *K++, x28 in next round 292 //add x20,x20,x17 // h+=Sigma0(a) 293#ifndef __AARCH64EB__ 294 rev x11,x11 // 8 295#endif 296 add x20,x20,x17 // h+=Sigma0(a) 297 ror x16,x24,#14 298 add x27,x27,x19 // h+=K[i] 299 eor x14,x24,x24,ror#23 300 and x17,x25,x24 301 bic x19,x26,x24 302 add x27,x27,x11 // h+=X[i] 303 orr x17,x17,x19 // Ch(e,f,g) 304 eor x19,x20,x21 // a^b, b^c in next round 305 eor x16,x16,x14,ror#18 // Sigma1(e) 306 ror x14,x20,#28 307 add x27,x27,x17 // h+=Ch(e,f,g) 308 eor x17,x20,x20,ror#5 309 add x27,x27,x16 // h+=Sigma1(e) 310 and x28,x28,x19 // (b^c)&=(a^b) 311 add x23,x23,x27 // d+=h 312 eor x28,x28,x21 // Maj(a,b,c) 313 eor x17,x14,x17,ror#34 // Sigma0(a) 314 add x27,x27,x28 // h+=Maj(a,b,c) 315 ldr x28,[x30],#8 // *K++, x19 in next round 316 //add x27,x27,x17 // h+=Sigma0(a) 317#ifndef __AARCH64EB__ 318 rev x12,x12 // 9 319#endif 320 ldp x13,x14,[x1],#2*8 321 add x27,x27,x17 // h+=Sigma0(a) 322 ror x16,x23,#14 323 add x26,x26,x28 // h+=K[i] 324 eor x15,x23,x23,ror#23 325 and x17,x24,x23 326 bic x28,x25,x23 327 add x26,x26,x12 // h+=X[i] 328 orr x17,x17,x28 // Ch(e,f,g) 329 eor x28,x27,x20 // a^b, b^c in next round 330 eor x16,x16,x15,ror#18 // Sigma1(e) 331 ror x15,x27,#28 332 add x26,x26,x17 // h+=Ch(e,f,g) 333 eor x17,x27,x27,ror#5 334 add x26,x26,x16 // h+=Sigma1(e) 335 and x19,x19,x28 // (b^c)&=(a^b) 336 add x22,x22,x26 // d+=h 337 eor x19,x19,x20 // Maj(a,b,c) 338 eor x17,x15,x17,ror#34 // Sigma0(a) 339 add x26,x26,x19 // h+=Maj(a,b,c) 340 ldr x19,[x30],#8 // *K++, x28 in next round 341 //add x26,x26,x17 // h+=Sigma0(a) 342#ifndef __AARCH64EB__ 343 rev x13,x13 // 10 344#endif 345 add x26,x26,x17 // h+=Sigma0(a) 346 ror x16,x22,#14 347 add x25,x25,x19 // h+=K[i] 348 eor x0,x22,x22,ror#23 349 and x17,x23,x22 350 bic x19,x24,x22 351 add x25,x25,x13 // h+=X[i] 352 orr x17,x17,x19 // Ch(e,f,g) 353 eor x19,x26,x27 // a^b, b^c in next round 354 eor x16,x16,x0,ror#18 // Sigma1(e) 355 ror x0,x26,#28 356 add x25,x25,x17 // h+=Ch(e,f,g) 357 eor x17,x26,x26,ror#5 358 add x25,x25,x16 // h+=Sigma1(e) 359 and x28,x28,x19 // (b^c)&=(a^b) 360 add x21,x21,x25 // d+=h 361 eor x28,x28,x27 // Maj(a,b,c) 362 eor x17,x0,x17,ror#34 // Sigma0(a) 363 add x25,x25,x28 // h+=Maj(a,b,c) 364 ldr x28,[x30],#8 // *K++, x19 in next round 365 //add x25,x25,x17 // h+=Sigma0(a) 366#ifndef __AARCH64EB__ 367 rev x14,x14 // 11 368#endif 369 ldp x15,x0,[x1],#2*8 370 add x25,x25,x17 // h+=Sigma0(a) 371 str x6,[sp,#24] 372 ror x16,x21,#14 373 add x24,x24,x28 // h+=K[i] 374 eor x6,x21,x21,ror#23 375 and x17,x22,x21 376 bic x28,x23,x21 377 add x24,x24,x14 // h+=X[i] 378 orr x17,x17,x28 // Ch(e,f,g) 379 eor x28,x25,x26 // a^b, b^c in next round 380 eor x16,x16,x6,ror#18 // Sigma1(e) 381 ror x6,x25,#28 382 add x24,x24,x17 // h+=Ch(e,f,g) 383 eor x17,x25,x25,ror#5 384 add x24,x24,x16 // h+=Sigma1(e) 385 and x19,x19,x28 // (b^c)&=(a^b) 386 add x20,x20,x24 // d+=h 387 eor x19,x19,x26 // Maj(a,b,c) 388 eor x17,x6,x17,ror#34 // Sigma0(a) 389 add x24,x24,x19 // h+=Maj(a,b,c) 390 ldr x19,[x30],#8 // *K++, x28 in next round 391 //add x24,x24,x17 // h+=Sigma0(a) 392#ifndef __AARCH64EB__ 393 rev x15,x15 // 12 394#endif 395 add x24,x24,x17 // h+=Sigma0(a) 396 str x7,[sp,#0] 397 ror x16,x20,#14 398 add x23,x23,x19 // h+=K[i] 399 eor x7,x20,x20,ror#23 400 and x17,x21,x20 401 bic x19,x22,x20 402 add x23,x23,x15 // h+=X[i] 403 orr x17,x17,x19 // Ch(e,f,g) 404 eor x19,x24,x25 // a^b, b^c in next round 405 eor x16,x16,x7,ror#18 // Sigma1(e) 406 ror x7,x24,#28 407 add x23,x23,x17 // h+=Ch(e,f,g) 408 eor x17,x24,x24,ror#5 409 add x23,x23,x16 // h+=Sigma1(e) 410 and x28,x28,x19 // (b^c)&=(a^b) 411 add x27,x27,x23 // d+=h 412 eor x28,x28,x25 // Maj(a,b,c) 413 eor x17,x7,x17,ror#34 // Sigma0(a) 414 add x23,x23,x28 // h+=Maj(a,b,c) 415 ldr x28,[x30],#8 // *K++, x19 in next round 416 //add x23,x23,x17 // h+=Sigma0(a) 417#ifndef __AARCH64EB__ 418 rev x0,x0 // 13 419#endif 420 ldp x1,x2,[x1] 421 add x23,x23,x17 // h+=Sigma0(a) 422 str x8,[sp,#8] 423 ror x16,x27,#14 424 add x22,x22,x28 // h+=K[i] 425 eor x8,x27,x27,ror#23 426 and x17,x20,x27 427 bic x28,x21,x27 428 add x22,x22,x0 // h+=X[i] 429 orr x17,x17,x28 // Ch(e,f,g) 430 eor x28,x23,x24 // a^b, b^c in next round 431 eor x16,x16,x8,ror#18 // Sigma1(e) 432 ror x8,x23,#28 433 add x22,x22,x17 // h+=Ch(e,f,g) 434 eor x17,x23,x23,ror#5 435 add x22,x22,x16 // h+=Sigma1(e) 436 and x19,x19,x28 // (b^c)&=(a^b) 437 add x26,x26,x22 // d+=h 438 eor x19,x19,x24 // Maj(a,b,c) 439 eor x17,x8,x17,ror#34 // Sigma0(a) 440 add x22,x22,x19 // h+=Maj(a,b,c) 441 ldr x19,[x30],#8 // *K++, x28 in next round 442 //add x22,x22,x17 // h+=Sigma0(a) 443#ifndef __AARCH64EB__ 444 rev x1,x1 // 14 445#endif 446 ldr x6,[sp,#24] 447 add x22,x22,x17 // h+=Sigma0(a) 448 str x9,[sp,#16] 449 ror x16,x26,#14 450 add x21,x21,x19 // h+=K[i] 451 eor x9,x26,x26,ror#23 452 and x17,x27,x26 453 bic x19,x20,x26 454 add x21,x21,x1 // h+=X[i] 455 orr x17,x17,x19 // Ch(e,f,g) 456 eor x19,x22,x23 // a^b, b^c in next round 457 eor x16,x16,x9,ror#18 // Sigma1(e) 458 ror x9,x22,#28 459 add x21,x21,x17 // h+=Ch(e,f,g) 460 eor x17,x22,x22,ror#5 461 add x21,x21,x16 // h+=Sigma1(e) 462 and x28,x28,x19 // (b^c)&=(a^b) 463 add x25,x25,x21 // d+=h 464 eor x28,x28,x23 // Maj(a,b,c) 465 eor x17,x9,x17,ror#34 // Sigma0(a) 466 add x21,x21,x28 // h+=Maj(a,b,c) 467 ldr x28,[x30],#8 // *K++, x19 in next round 468 //add x21,x21,x17 // h+=Sigma0(a) 469#ifndef __AARCH64EB__ 470 rev x2,x2 // 15 471#endif 472 ldr x7,[sp,#0] 473 add x21,x21,x17 // h+=Sigma0(a) 474 str x10,[sp,#24] 475 ror x16,x25,#14 476 add x20,x20,x28 // h+=K[i] 477 ror x9,x4,#1 478 and x17,x26,x25 479 ror x8,x1,#19 480 bic x28,x27,x25 481 ror x10,x21,#28 482 add x20,x20,x2 // h+=X[i] 483 eor x16,x16,x25,ror#18 484 eor x9,x9,x4,ror#8 485 orr x17,x17,x28 // Ch(e,f,g) 486 eor x28,x21,x22 // a^b, b^c in next round 487 eor x16,x16,x25,ror#41 // Sigma1(e) 488 eor x10,x10,x21,ror#34 489 add x20,x20,x17 // h+=Ch(e,f,g) 490 and x19,x19,x28 // (b^c)&=(a^b) 491 eor x8,x8,x1,ror#61 492 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 493 add x20,x20,x16 // h+=Sigma1(e) 494 eor x19,x19,x22 // Maj(a,b,c) 495 eor x17,x10,x21,ror#39 // Sigma0(a) 496 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 497 add x3,x3,x12 498 add x24,x24,x20 // d+=h 499 add x20,x20,x19 // h+=Maj(a,b,c) 500 ldr x19,[x30],#8 // *K++, x28 in next round 501 add x3,x3,x9 502 add x20,x20,x17 // h+=Sigma0(a) 503 add x3,x3,x8 504Loop_16_xx: 505 ldr x8,[sp,#8] 506 str x11,[sp,#0] 507 ror x16,x24,#14 508 add x27,x27,x19 // h+=K[i] 509 ror x10,x5,#1 510 and x17,x25,x24 511 ror x9,x2,#19 512 bic x19,x26,x24 513 ror x11,x20,#28 514 add x27,x27,x3 // h+=X[i] 515 eor x16,x16,x24,ror#18 516 eor x10,x10,x5,ror#8 517 orr x17,x17,x19 // Ch(e,f,g) 518 eor x19,x20,x21 // a^b, b^c in next round 519 eor x16,x16,x24,ror#41 // Sigma1(e) 520 eor x11,x11,x20,ror#34 521 add x27,x27,x17 // h+=Ch(e,f,g) 522 and x28,x28,x19 // (b^c)&=(a^b) 523 eor x9,x9,x2,ror#61 524 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 525 add x27,x27,x16 // h+=Sigma1(e) 526 eor x28,x28,x21 // Maj(a,b,c) 527 eor x17,x11,x20,ror#39 // Sigma0(a) 528 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 529 add x4,x4,x13 530 add x23,x23,x27 // d+=h 531 add x27,x27,x28 // h+=Maj(a,b,c) 532 ldr x28,[x30],#8 // *K++, x19 in next round 533 add x4,x4,x10 534 add x27,x27,x17 // h+=Sigma0(a) 535 add x4,x4,x9 536 ldr x9,[sp,#16] 537 str x12,[sp,#8] 538 ror x16,x23,#14 539 add x26,x26,x28 // h+=K[i] 540 ror x11,x6,#1 541 and x17,x24,x23 542 ror x10,x3,#19 543 bic x28,x25,x23 544 ror x12,x27,#28 545 add x26,x26,x4 // h+=X[i] 546 eor x16,x16,x23,ror#18 547 eor x11,x11,x6,ror#8 548 orr x17,x17,x28 // Ch(e,f,g) 549 eor x28,x27,x20 // a^b, b^c in next round 550 eor x16,x16,x23,ror#41 // Sigma1(e) 551 eor x12,x12,x27,ror#34 552 add x26,x26,x17 // h+=Ch(e,f,g) 553 and x19,x19,x28 // (b^c)&=(a^b) 554 eor x10,x10,x3,ror#61 555 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 556 add x26,x26,x16 // h+=Sigma1(e) 557 eor x19,x19,x20 // Maj(a,b,c) 558 eor x17,x12,x27,ror#39 // Sigma0(a) 559 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 560 add x5,x5,x14 561 add x22,x22,x26 // d+=h 562 add x26,x26,x19 // h+=Maj(a,b,c) 563 ldr x19,[x30],#8 // *K++, x28 in next round 564 add x5,x5,x11 565 add x26,x26,x17 // h+=Sigma0(a) 566 add x5,x5,x10 567 ldr x10,[sp,#24] 568 str x13,[sp,#16] 569 ror x16,x22,#14 570 add x25,x25,x19 // h+=K[i] 571 ror x12,x7,#1 572 and x17,x23,x22 573 ror x11,x4,#19 574 bic x19,x24,x22 575 ror x13,x26,#28 576 add x25,x25,x5 // h+=X[i] 577 eor x16,x16,x22,ror#18 578 eor x12,x12,x7,ror#8 579 orr x17,x17,x19 // Ch(e,f,g) 580 eor x19,x26,x27 // a^b, b^c in next round 581 eor x16,x16,x22,ror#41 // Sigma1(e) 582 eor x13,x13,x26,ror#34 583 add x25,x25,x17 // h+=Ch(e,f,g) 584 and x28,x28,x19 // (b^c)&=(a^b) 585 eor x11,x11,x4,ror#61 586 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 587 add x25,x25,x16 // h+=Sigma1(e) 588 eor x28,x28,x27 // Maj(a,b,c) 589 eor x17,x13,x26,ror#39 // Sigma0(a) 590 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 591 add x6,x6,x15 592 add x21,x21,x25 // d+=h 593 add x25,x25,x28 // h+=Maj(a,b,c) 594 ldr x28,[x30],#8 // *K++, x19 in next round 595 add x6,x6,x12 596 add x25,x25,x17 // h+=Sigma0(a) 597 add x6,x6,x11 598 ldr x11,[sp,#0] 599 str x14,[sp,#24] 600 ror x16,x21,#14 601 add x24,x24,x28 // h+=K[i] 602 ror x13,x8,#1 603 and x17,x22,x21 604 ror x12,x5,#19 605 bic x28,x23,x21 606 ror x14,x25,#28 607 add x24,x24,x6 // h+=X[i] 608 eor x16,x16,x21,ror#18 609 eor x13,x13,x8,ror#8 610 orr x17,x17,x28 // Ch(e,f,g) 611 eor x28,x25,x26 // a^b, b^c in next round 612 eor x16,x16,x21,ror#41 // Sigma1(e) 613 eor x14,x14,x25,ror#34 614 add x24,x24,x17 // h+=Ch(e,f,g) 615 and x19,x19,x28 // (b^c)&=(a^b) 616 eor x12,x12,x5,ror#61 617 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 618 add x24,x24,x16 // h+=Sigma1(e) 619 eor x19,x19,x26 // Maj(a,b,c) 620 eor x17,x14,x25,ror#39 // Sigma0(a) 621 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 622 add x7,x7,x0 623 add x20,x20,x24 // d+=h 624 add x24,x24,x19 // h+=Maj(a,b,c) 625 ldr x19,[x30],#8 // *K++, x28 in next round 626 add x7,x7,x13 627 add x24,x24,x17 // h+=Sigma0(a) 628 add x7,x7,x12 629 ldr x12,[sp,#8] 630 str x15,[sp,#0] 631 ror x16,x20,#14 632 add x23,x23,x19 // h+=K[i] 633 ror x14,x9,#1 634 and x17,x21,x20 635 ror x13,x6,#19 636 bic x19,x22,x20 637 ror x15,x24,#28 638 add x23,x23,x7 // h+=X[i] 639 eor x16,x16,x20,ror#18 640 eor x14,x14,x9,ror#8 641 orr x17,x17,x19 // Ch(e,f,g) 642 eor x19,x24,x25 // a^b, b^c in next round 643 eor x16,x16,x20,ror#41 // Sigma1(e) 644 eor x15,x15,x24,ror#34 645 add x23,x23,x17 // h+=Ch(e,f,g) 646 and x28,x28,x19 // (b^c)&=(a^b) 647 eor x13,x13,x6,ror#61 648 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 649 add x23,x23,x16 // h+=Sigma1(e) 650 eor x28,x28,x25 // Maj(a,b,c) 651 eor x17,x15,x24,ror#39 // Sigma0(a) 652 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 653 add x8,x8,x1 654 add x27,x27,x23 // d+=h 655 add x23,x23,x28 // h+=Maj(a,b,c) 656 ldr x28,[x30],#8 // *K++, x19 in next round 657 add x8,x8,x14 658 add x23,x23,x17 // h+=Sigma0(a) 659 add x8,x8,x13 660 ldr x13,[sp,#16] 661 str x0,[sp,#8] 662 ror x16,x27,#14 663 add x22,x22,x28 // h+=K[i] 664 ror x15,x10,#1 665 and x17,x20,x27 666 ror x14,x7,#19 667 bic x28,x21,x27 668 ror x0,x23,#28 669 add x22,x22,x8 // h+=X[i] 670 eor x16,x16,x27,ror#18 671 eor x15,x15,x10,ror#8 672 orr x17,x17,x28 // Ch(e,f,g) 673 eor x28,x23,x24 // a^b, b^c in next round 674 eor x16,x16,x27,ror#41 // Sigma1(e) 675 eor x0,x0,x23,ror#34 676 add x22,x22,x17 // h+=Ch(e,f,g) 677 and x19,x19,x28 // (b^c)&=(a^b) 678 eor x14,x14,x7,ror#61 679 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 680 add x22,x22,x16 // h+=Sigma1(e) 681 eor x19,x19,x24 // Maj(a,b,c) 682 eor x17,x0,x23,ror#39 // Sigma0(a) 683 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 684 add x9,x9,x2 685 add x26,x26,x22 // d+=h 686 add x22,x22,x19 // h+=Maj(a,b,c) 687 ldr x19,[x30],#8 // *K++, x28 in next round 688 add x9,x9,x15 689 add x22,x22,x17 // h+=Sigma0(a) 690 add x9,x9,x14 691 ldr x14,[sp,#24] 692 str x1,[sp,#16] 693 ror x16,x26,#14 694 add x21,x21,x19 // h+=K[i] 695 ror x0,x11,#1 696 and x17,x27,x26 697 ror x15,x8,#19 698 bic x19,x20,x26 699 ror x1,x22,#28 700 add x21,x21,x9 // h+=X[i] 701 eor x16,x16,x26,ror#18 702 eor x0,x0,x11,ror#8 703 orr x17,x17,x19 // Ch(e,f,g) 704 eor x19,x22,x23 // a^b, b^c in next round 705 eor x16,x16,x26,ror#41 // Sigma1(e) 706 eor x1,x1,x22,ror#34 707 add x21,x21,x17 // h+=Ch(e,f,g) 708 and x28,x28,x19 // (b^c)&=(a^b) 709 eor x15,x15,x8,ror#61 710 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 711 add x21,x21,x16 // h+=Sigma1(e) 712 eor x28,x28,x23 // Maj(a,b,c) 713 eor x17,x1,x22,ror#39 // Sigma0(a) 714 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 715 add x10,x10,x3 716 add x25,x25,x21 // d+=h 717 add x21,x21,x28 // h+=Maj(a,b,c) 718 ldr x28,[x30],#8 // *K++, x19 in next round 719 add x10,x10,x0 720 add x21,x21,x17 // h+=Sigma0(a) 721 add x10,x10,x15 722 ldr x15,[sp,#0] 723 str x2,[sp,#24] 724 ror x16,x25,#14 725 add x20,x20,x28 // h+=K[i] 726 ror x1,x12,#1 727 and x17,x26,x25 728 ror x0,x9,#19 729 bic x28,x27,x25 730 ror x2,x21,#28 731 add x20,x20,x10 // h+=X[i] 732 eor x16,x16,x25,ror#18 733 eor x1,x1,x12,ror#8 734 orr x17,x17,x28 // Ch(e,f,g) 735 eor x28,x21,x22 // a^b, b^c in next round 736 eor x16,x16,x25,ror#41 // Sigma1(e) 737 eor x2,x2,x21,ror#34 738 add x20,x20,x17 // h+=Ch(e,f,g) 739 and x19,x19,x28 // (b^c)&=(a^b) 740 eor x0,x0,x9,ror#61 741 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 742 add x20,x20,x16 // h+=Sigma1(e) 743 eor x19,x19,x22 // Maj(a,b,c) 744 eor x17,x2,x21,ror#39 // Sigma0(a) 745 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 746 add x11,x11,x4 747 add x24,x24,x20 // d+=h 748 add x20,x20,x19 // h+=Maj(a,b,c) 749 ldr x19,[x30],#8 // *K++, x28 in next round 750 add x11,x11,x1 751 add x20,x20,x17 // h+=Sigma0(a) 752 add x11,x11,x0 753 ldr x0,[sp,#8] 754 str x3,[sp,#0] 755 ror x16,x24,#14 756 add x27,x27,x19 // h+=K[i] 757 ror x2,x13,#1 758 and x17,x25,x24 759 ror x1,x10,#19 760 bic x19,x26,x24 761 ror x3,x20,#28 762 add x27,x27,x11 // h+=X[i] 763 eor x16,x16,x24,ror#18 764 eor x2,x2,x13,ror#8 765 orr x17,x17,x19 // Ch(e,f,g) 766 eor x19,x20,x21 // a^b, b^c in next round 767 eor x16,x16,x24,ror#41 // Sigma1(e) 768 eor x3,x3,x20,ror#34 769 add x27,x27,x17 // h+=Ch(e,f,g) 770 and x28,x28,x19 // (b^c)&=(a^b) 771 eor x1,x1,x10,ror#61 772 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 773 add x27,x27,x16 // h+=Sigma1(e) 774 eor x28,x28,x21 // Maj(a,b,c) 775 eor x17,x3,x20,ror#39 // Sigma0(a) 776 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 777 add x12,x12,x5 778 add x23,x23,x27 // d+=h 779 add x27,x27,x28 // h+=Maj(a,b,c) 780 ldr x28,[x30],#8 // *K++, x19 in next round 781 add x12,x12,x2 782 add x27,x27,x17 // h+=Sigma0(a) 783 add x12,x12,x1 784 ldr x1,[sp,#16] 785 str x4,[sp,#8] 786 ror x16,x23,#14 787 add x26,x26,x28 // h+=K[i] 788 ror x3,x14,#1 789 and x17,x24,x23 790 ror x2,x11,#19 791 bic x28,x25,x23 792 ror x4,x27,#28 793 add x26,x26,x12 // h+=X[i] 794 eor x16,x16,x23,ror#18 795 eor x3,x3,x14,ror#8 796 orr x17,x17,x28 // Ch(e,f,g) 797 eor x28,x27,x20 // a^b, b^c in next round 798 eor x16,x16,x23,ror#41 // Sigma1(e) 799 eor x4,x4,x27,ror#34 800 add x26,x26,x17 // h+=Ch(e,f,g) 801 and x19,x19,x28 // (b^c)&=(a^b) 802 eor x2,x2,x11,ror#61 803 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 804 add x26,x26,x16 // h+=Sigma1(e) 805 eor x19,x19,x20 // Maj(a,b,c) 806 eor x17,x4,x27,ror#39 // Sigma0(a) 807 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 808 add x13,x13,x6 809 add x22,x22,x26 // d+=h 810 add x26,x26,x19 // h+=Maj(a,b,c) 811 ldr x19,[x30],#8 // *K++, x28 in next round 812 add x13,x13,x3 813 add x26,x26,x17 // h+=Sigma0(a) 814 add x13,x13,x2 815 ldr x2,[sp,#24] 816 str x5,[sp,#16] 817 ror x16,x22,#14 818 add x25,x25,x19 // h+=K[i] 819 ror x4,x15,#1 820 and x17,x23,x22 821 ror x3,x12,#19 822 bic x19,x24,x22 823 ror x5,x26,#28 824 add x25,x25,x13 // h+=X[i] 825 eor x16,x16,x22,ror#18 826 eor x4,x4,x15,ror#8 827 orr x17,x17,x19 // Ch(e,f,g) 828 eor x19,x26,x27 // a^b, b^c in next round 829 eor x16,x16,x22,ror#41 // Sigma1(e) 830 eor x5,x5,x26,ror#34 831 add x25,x25,x17 // h+=Ch(e,f,g) 832 and x28,x28,x19 // (b^c)&=(a^b) 833 eor x3,x3,x12,ror#61 834 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 835 add x25,x25,x16 // h+=Sigma1(e) 836 eor x28,x28,x27 // Maj(a,b,c) 837 eor x17,x5,x26,ror#39 // Sigma0(a) 838 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 839 add x14,x14,x7 840 add x21,x21,x25 // d+=h 841 add x25,x25,x28 // h+=Maj(a,b,c) 842 ldr x28,[x30],#8 // *K++, x19 in next round 843 add x14,x14,x4 844 add x25,x25,x17 // h+=Sigma0(a) 845 add x14,x14,x3 846 ldr x3,[sp,#0] 847 str x6,[sp,#24] 848 ror x16,x21,#14 849 add x24,x24,x28 // h+=K[i] 850 ror x5,x0,#1 851 and x17,x22,x21 852 ror x4,x13,#19 853 bic x28,x23,x21 854 ror x6,x25,#28 855 add x24,x24,x14 // h+=X[i] 856 eor x16,x16,x21,ror#18 857 eor x5,x5,x0,ror#8 858 orr x17,x17,x28 // Ch(e,f,g) 859 eor x28,x25,x26 // a^b, b^c in next round 860 eor x16,x16,x21,ror#41 // Sigma1(e) 861 eor x6,x6,x25,ror#34 862 add x24,x24,x17 // h+=Ch(e,f,g) 863 and x19,x19,x28 // (b^c)&=(a^b) 864 eor x4,x4,x13,ror#61 865 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 866 add x24,x24,x16 // h+=Sigma1(e) 867 eor x19,x19,x26 // Maj(a,b,c) 868 eor x17,x6,x25,ror#39 // Sigma0(a) 869 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 870 add x15,x15,x8 871 add x20,x20,x24 // d+=h 872 add x24,x24,x19 // h+=Maj(a,b,c) 873 ldr x19,[x30],#8 // *K++, x28 in next round 874 add x15,x15,x5 875 add x24,x24,x17 // h+=Sigma0(a) 876 add x15,x15,x4 877 ldr x4,[sp,#8] 878 str x7,[sp,#0] 879 ror x16,x20,#14 880 add x23,x23,x19 // h+=K[i] 881 ror x6,x1,#1 882 and x17,x21,x20 883 ror x5,x14,#19 884 bic x19,x22,x20 885 ror x7,x24,#28 886 add x23,x23,x15 // h+=X[i] 887 eor x16,x16,x20,ror#18 888 eor x6,x6,x1,ror#8 889 orr x17,x17,x19 // Ch(e,f,g) 890 eor x19,x24,x25 // a^b, b^c in next round 891 eor x16,x16,x20,ror#41 // Sigma1(e) 892 eor x7,x7,x24,ror#34 893 add x23,x23,x17 // h+=Ch(e,f,g) 894 and x28,x28,x19 // (b^c)&=(a^b) 895 eor x5,x5,x14,ror#61 896 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 897 add x23,x23,x16 // h+=Sigma1(e) 898 eor x28,x28,x25 // Maj(a,b,c) 899 eor x17,x7,x24,ror#39 // Sigma0(a) 900 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 901 add x0,x0,x9 902 add x27,x27,x23 // d+=h 903 add x23,x23,x28 // h+=Maj(a,b,c) 904 ldr x28,[x30],#8 // *K++, x19 in next round 905 add x0,x0,x6 906 add x23,x23,x17 // h+=Sigma0(a) 907 add x0,x0,x5 908 ldr x5,[sp,#16] 909 str x8,[sp,#8] 910 ror x16,x27,#14 911 add x22,x22,x28 // h+=K[i] 912 ror x7,x2,#1 913 and x17,x20,x27 914 ror x6,x15,#19 915 bic x28,x21,x27 916 ror x8,x23,#28 917 add x22,x22,x0 // h+=X[i] 918 eor x16,x16,x27,ror#18 919 eor x7,x7,x2,ror#8 920 orr x17,x17,x28 // Ch(e,f,g) 921 eor x28,x23,x24 // a^b, b^c in next round 922 eor x16,x16,x27,ror#41 // Sigma1(e) 923 eor x8,x8,x23,ror#34 924 add x22,x22,x17 // h+=Ch(e,f,g) 925 and x19,x19,x28 // (b^c)&=(a^b) 926 eor x6,x6,x15,ror#61 927 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 928 add x22,x22,x16 // h+=Sigma1(e) 929 eor x19,x19,x24 // Maj(a,b,c) 930 eor x17,x8,x23,ror#39 // Sigma0(a) 931 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 932 add x1,x1,x10 933 add x26,x26,x22 // d+=h 934 add x22,x22,x19 // h+=Maj(a,b,c) 935 ldr x19,[x30],#8 // *K++, x28 in next round 936 add x1,x1,x7 937 add x22,x22,x17 // h+=Sigma0(a) 938 add x1,x1,x6 939 ldr x6,[sp,#24] 940 str x9,[sp,#16] 941 ror x16,x26,#14 942 add x21,x21,x19 // h+=K[i] 943 ror x8,x3,#1 944 and x17,x27,x26 945 ror x7,x0,#19 946 bic x19,x20,x26 947 ror x9,x22,#28 948 add x21,x21,x1 // h+=X[i] 949 eor x16,x16,x26,ror#18 950 eor x8,x8,x3,ror#8 951 orr x17,x17,x19 // Ch(e,f,g) 952 eor x19,x22,x23 // a^b, b^c in next round 953 eor x16,x16,x26,ror#41 // Sigma1(e) 954 eor x9,x9,x22,ror#34 955 add x21,x21,x17 // h+=Ch(e,f,g) 956 and x28,x28,x19 // (b^c)&=(a^b) 957 eor x7,x7,x0,ror#61 958 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 959 add x21,x21,x16 // h+=Sigma1(e) 960 eor x28,x28,x23 // Maj(a,b,c) 961 eor x17,x9,x22,ror#39 // Sigma0(a) 962 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 963 add x2,x2,x11 964 add x25,x25,x21 // d+=h 965 add x21,x21,x28 // h+=Maj(a,b,c) 966 ldr x28,[x30],#8 // *K++, x19 in next round 967 add x2,x2,x8 968 add x21,x21,x17 // h+=Sigma0(a) 969 add x2,x2,x7 970 ldr x7,[sp,#0] 971 str x10,[sp,#24] 972 ror x16,x25,#14 973 add x20,x20,x28 // h+=K[i] 974 ror x9,x4,#1 975 and x17,x26,x25 976 ror x8,x1,#19 977 bic x28,x27,x25 978 ror x10,x21,#28 979 add x20,x20,x2 // h+=X[i] 980 eor x16,x16,x25,ror#18 981 eor x9,x9,x4,ror#8 982 orr x17,x17,x28 // Ch(e,f,g) 983 eor x28,x21,x22 // a^b, b^c in next round 984 eor x16,x16,x25,ror#41 // Sigma1(e) 985 eor x10,x10,x21,ror#34 986 add x20,x20,x17 // h+=Ch(e,f,g) 987 and x19,x19,x28 // (b^c)&=(a^b) 988 eor x8,x8,x1,ror#61 989 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 990 add x20,x20,x16 // h+=Sigma1(e) 991 eor x19,x19,x22 // Maj(a,b,c) 992 eor x17,x10,x21,ror#39 // Sigma0(a) 993 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 994 add x3,x3,x12 995 add x24,x24,x20 // d+=h 996 add x20,x20,x19 // h+=Maj(a,b,c) 997 ldr x19,[x30],#8 // *K++, x28 in next round 998 add x3,x3,x9 999 add x20,x20,x17 // h+=Sigma0(a) 1000 add x3,x3,x8 1001 cbnz x19,Loop_16_xx 1002 1003 ldp x0,x2,[x29,#96] 1004 ldr x1,[x29,#112] 1005 sub x30,x30,#648 // rewind 1006 1007 ldp x3,x4,[x0] 1008 ldp x5,x6,[x0,#2*8] 1009 add x1,x1,#14*8 // advance input pointer 1010 ldp x7,x8,[x0,#4*8] 1011 add x20,x20,x3 1012 ldp x9,x10,[x0,#6*8] 1013 add x21,x21,x4 1014 add x22,x22,x5 1015 add x23,x23,x6 1016 stp x20,x21,[x0] 1017 add x24,x24,x7 1018 add x25,x25,x8 1019 stp x22,x23,[x0,#2*8] 1020 add x26,x26,x9 1021 add x27,x27,x10 1022 cmp x1,x2 1023 stp x24,x25,[x0,#4*8] 1024 stp x26,x27,[x0,#6*8] 1025 b.ne Loop 1026 1027 ldp x19,x20,[x29,#16] 1028 add sp,sp,#4*8 1029 ldp x21,x22,[x29,#32] 1030 ldp x23,x24,[x29,#48] 1031 ldp x25,x26,[x29,#64] 1032 ldp x27,x28,[x29,#80] 1033 ldp x29,x30,[sp],#128 1034 AARCH64_VALIDATE_LINK_REGISTER 1035 ret 1036 1037 1038.section .rodata 1039.align 6 1040 1041LK512: 1042.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1043.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1044.quad 0x3956c25bf348b538,0x59f111f1b605d019 1045.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1046.quad 0xd807aa98a3030242,0x12835b0145706fbe 1047.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1048.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1049.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1050.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1051.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1052.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1053.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1054.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1055.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1056.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1057.quad 0x06ca6351e003826f,0x142929670a0e6e70 1058.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1059.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1060.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1061.quad 0x81c2c92e47edaee6,0x92722c851482353b 1062.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1063.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1064.quad 0xd192e819d6ef5218,0xd69906245565a910 1065.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1066.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1067.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1068.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1069.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1070.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1071.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1072.quad 0x90befffa23631e28,0xa4506cebde82bde9 1073.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1074.quad 0xca273eceea26619c,0xd186b8c721c0c207 1075.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1076.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1077.quad 0x113f9804bef90dae,0x1b710b35131c471b 1078.quad 0x28db77f523047d84,0x32caab7b40c72493 1079.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1080.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1081.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1082.quad 0 // terminator 1083 1084.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1085.align 2 1086.align 2 1087.text 1088#ifndef __KERNEL__ 1089.def sha512_block_armv8 1090 .type 32 1091.endef 1092.align 6 1093sha512_block_armv8: 1094Lv8_entry: 1095 stp x29,x30,[sp,#-16]! 1096 add x29,sp,#0 1097 1098 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1099 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1100 1101 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1102 adrp x3,LK512 1103 add x3,x3,:lo12:LK512 1104 1105 rev64 v16.16b,v16.16b 1106 rev64 v17.16b,v17.16b 1107 rev64 v18.16b,v18.16b 1108 rev64 v19.16b,v19.16b 1109 rev64 v20.16b,v20.16b 1110 rev64 v21.16b,v21.16b 1111 rev64 v22.16b,v22.16b 1112 rev64 v23.16b,v23.16b 1113 b Loop_hw 1114 1115.align 4 1116Loop_hw: 1117 ld1 {v24.2d},[x3],#16 1118 subs x2,x2,#1 1119 sub x4,x1,#128 1120 orr v26.16b,v0.16b,v0.16b // offload 1121 orr v27.16b,v1.16b,v1.16b 1122 orr v28.16b,v2.16b,v2.16b 1123 orr v29.16b,v3.16b,v3.16b 1124 csel x1,x1,x4,ne // conditional rewind 1125 add v24.2d,v24.2d,v16.2d 1126 ld1 {v25.2d},[x3],#16 1127 ext v24.16b,v24.16b,v24.16b,#8 1128 ext v5.16b,v2.16b,v3.16b,#8 1129 ext v6.16b,v1.16b,v2.16b,#8 1130 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1131.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1132 ext v7.16b,v20.16b,v21.16b,#8 1133.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1134.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1135 add v4.2d,v1.2d,v3.2d // "D + T1" 1136.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1137 add v25.2d,v25.2d,v17.2d 1138 ld1 {v24.2d},[x3],#16 1139 ext v25.16b,v25.16b,v25.16b,#8 1140 ext v5.16b,v4.16b,v2.16b,#8 1141 ext v6.16b,v0.16b,v4.16b,#8 1142 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1143.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1144 ext v7.16b,v21.16b,v22.16b,#8 1145.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1146.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1147 add v1.2d,v0.2d,v2.2d // "D + T1" 1148.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1149 add v24.2d,v24.2d,v18.2d 1150 ld1 {v25.2d},[x3],#16 1151 ext v24.16b,v24.16b,v24.16b,#8 1152 ext v5.16b,v1.16b,v4.16b,#8 1153 ext v6.16b,v3.16b,v1.16b,#8 1154 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1155.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1156 ext v7.16b,v22.16b,v23.16b,#8 1157.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1158.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1159 add v0.2d,v3.2d,v4.2d // "D + T1" 1160.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1161 add v25.2d,v25.2d,v19.2d 1162 ld1 {v24.2d},[x3],#16 1163 ext v25.16b,v25.16b,v25.16b,#8 1164 ext v5.16b,v0.16b,v1.16b,#8 1165 ext v6.16b,v2.16b,v0.16b,#8 1166 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1167.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1168 ext v7.16b,v23.16b,v16.16b,#8 1169.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1170.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1171 add v3.2d,v2.2d,v1.2d // "D + T1" 1172.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1173 add v24.2d,v24.2d,v20.2d 1174 ld1 {v25.2d},[x3],#16 1175 ext v24.16b,v24.16b,v24.16b,#8 1176 ext v5.16b,v3.16b,v0.16b,#8 1177 ext v6.16b,v4.16b,v3.16b,#8 1178 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1179.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1180 ext v7.16b,v16.16b,v17.16b,#8 1181.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1182.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1183 add v2.2d,v4.2d,v0.2d // "D + T1" 1184.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1185 add v25.2d,v25.2d,v21.2d 1186 ld1 {v24.2d},[x3],#16 1187 ext v25.16b,v25.16b,v25.16b,#8 1188 ext v5.16b,v2.16b,v3.16b,#8 1189 ext v6.16b,v1.16b,v2.16b,#8 1190 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1191.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1192 ext v7.16b,v17.16b,v18.16b,#8 1193.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1194.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1195 add v4.2d,v1.2d,v3.2d // "D + T1" 1196.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1197 add v24.2d,v24.2d,v22.2d 1198 ld1 {v25.2d},[x3],#16 1199 ext v24.16b,v24.16b,v24.16b,#8 1200 ext v5.16b,v4.16b,v2.16b,#8 1201 ext v6.16b,v0.16b,v4.16b,#8 1202 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1203.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1204 ext v7.16b,v18.16b,v19.16b,#8 1205.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1206.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1207 add v1.2d,v0.2d,v2.2d // "D + T1" 1208.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1209 add v25.2d,v25.2d,v23.2d 1210 ld1 {v24.2d},[x3],#16 1211 ext v25.16b,v25.16b,v25.16b,#8 1212 ext v5.16b,v1.16b,v4.16b,#8 1213 ext v6.16b,v3.16b,v1.16b,#8 1214 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1215.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1216 ext v7.16b,v19.16b,v20.16b,#8 1217.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1218.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1219 add v0.2d,v3.2d,v4.2d // "D + T1" 1220.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1221 add v24.2d,v24.2d,v16.2d 1222 ld1 {v25.2d},[x3],#16 1223 ext v24.16b,v24.16b,v24.16b,#8 1224 ext v5.16b,v0.16b,v1.16b,#8 1225 ext v6.16b,v2.16b,v0.16b,#8 1226 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1227.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1228 ext v7.16b,v20.16b,v21.16b,#8 1229.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1230.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1231 add v3.2d,v2.2d,v1.2d // "D + T1" 1232.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1233 add v25.2d,v25.2d,v17.2d 1234 ld1 {v24.2d},[x3],#16 1235 ext v25.16b,v25.16b,v25.16b,#8 1236 ext v5.16b,v3.16b,v0.16b,#8 1237 ext v6.16b,v4.16b,v3.16b,#8 1238 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1239.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1240 ext v7.16b,v21.16b,v22.16b,#8 1241.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1242.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1243 add v2.2d,v4.2d,v0.2d // "D + T1" 1244.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1245 add v24.2d,v24.2d,v18.2d 1246 ld1 {v25.2d},[x3],#16 1247 ext v24.16b,v24.16b,v24.16b,#8 1248 ext v5.16b,v2.16b,v3.16b,#8 1249 ext v6.16b,v1.16b,v2.16b,#8 1250 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1251.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1252 ext v7.16b,v22.16b,v23.16b,#8 1253.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1254.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1255 add v4.2d,v1.2d,v3.2d // "D + T1" 1256.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1257 add v25.2d,v25.2d,v19.2d 1258 ld1 {v24.2d},[x3],#16 1259 ext v25.16b,v25.16b,v25.16b,#8 1260 ext v5.16b,v4.16b,v2.16b,#8 1261 ext v6.16b,v0.16b,v4.16b,#8 1262 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1263.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1264 ext v7.16b,v23.16b,v16.16b,#8 1265.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1266.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1267 add v1.2d,v0.2d,v2.2d // "D + T1" 1268.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1269 add v24.2d,v24.2d,v20.2d 1270 ld1 {v25.2d},[x3],#16 1271 ext v24.16b,v24.16b,v24.16b,#8 1272 ext v5.16b,v1.16b,v4.16b,#8 1273 ext v6.16b,v3.16b,v1.16b,#8 1274 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1275.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1276 ext v7.16b,v16.16b,v17.16b,#8 1277.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1278.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1279 add v0.2d,v3.2d,v4.2d // "D + T1" 1280.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1281 add v25.2d,v25.2d,v21.2d 1282 ld1 {v24.2d},[x3],#16 1283 ext v25.16b,v25.16b,v25.16b,#8 1284 ext v5.16b,v0.16b,v1.16b,#8 1285 ext v6.16b,v2.16b,v0.16b,#8 1286 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1287.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1288 ext v7.16b,v17.16b,v18.16b,#8 1289.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1290.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1291 add v3.2d,v2.2d,v1.2d // "D + T1" 1292.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1293 add v24.2d,v24.2d,v22.2d 1294 ld1 {v25.2d},[x3],#16 1295 ext v24.16b,v24.16b,v24.16b,#8 1296 ext v5.16b,v3.16b,v0.16b,#8 1297 ext v6.16b,v4.16b,v3.16b,#8 1298 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1299.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1300 ext v7.16b,v18.16b,v19.16b,#8 1301.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1302.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1303 add v2.2d,v4.2d,v0.2d // "D + T1" 1304.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1305 add v25.2d,v25.2d,v23.2d 1306 ld1 {v24.2d},[x3],#16 1307 ext v25.16b,v25.16b,v25.16b,#8 1308 ext v5.16b,v2.16b,v3.16b,#8 1309 ext v6.16b,v1.16b,v2.16b,#8 1310 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1311.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1312 ext v7.16b,v19.16b,v20.16b,#8 1313.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1314.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1315 add v4.2d,v1.2d,v3.2d // "D + T1" 1316.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1317 add v24.2d,v24.2d,v16.2d 1318 ld1 {v25.2d},[x3],#16 1319 ext v24.16b,v24.16b,v24.16b,#8 1320 ext v5.16b,v4.16b,v2.16b,#8 1321 ext v6.16b,v0.16b,v4.16b,#8 1322 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1323.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1324 ext v7.16b,v20.16b,v21.16b,#8 1325.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1326.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1327 add v1.2d,v0.2d,v2.2d // "D + T1" 1328.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1329 add v25.2d,v25.2d,v17.2d 1330 ld1 {v24.2d},[x3],#16 1331 ext v25.16b,v25.16b,v25.16b,#8 1332 ext v5.16b,v1.16b,v4.16b,#8 1333 ext v6.16b,v3.16b,v1.16b,#8 1334 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1335.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1336 ext v7.16b,v21.16b,v22.16b,#8 1337.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1338.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1339 add v0.2d,v3.2d,v4.2d // "D + T1" 1340.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1341 add v24.2d,v24.2d,v18.2d 1342 ld1 {v25.2d},[x3],#16 1343 ext v24.16b,v24.16b,v24.16b,#8 1344 ext v5.16b,v0.16b,v1.16b,#8 1345 ext v6.16b,v2.16b,v0.16b,#8 1346 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1347.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1348 ext v7.16b,v22.16b,v23.16b,#8 1349.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1350.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1351 add v3.2d,v2.2d,v1.2d // "D + T1" 1352.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1353 add v25.2d,v25.2d,v19.2d 1354 ld1 {v24.2d},[x3],#16 1355 ext v25.16b,v25.16b,v25.16b,#8 1356 ext v5.16b,v3.16b,v0.16b,#8 1357 ext v6.16b,v4.16b,v3.16b,#8 1358 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1359.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1360 ext v7.16b,v23.16b,v16.16b,#8 1361.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1362.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1363 add v2.2d,v4.2d,v0.2d // "D + T1" 1364.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1365 add v24.2d,v24.2d,v20.2d 1366 ld1 {v25.2d},[x3],#16 1367 ext v24.16b,v24.16b,v24.16b,#8 1368 ext v5.16b,v2.16b,v3.16b,#8 1369 ext v6.16b,v1.16b,v2.16b,#8 1370 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1371.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1372 ext v7.16b,v16.16b,v17.16b,#8 1373.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1374.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1375 add v4.2d,v1.2d,v3.2d // "D + T1" 1376.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1377 add v25.2d,v25.2d,v21.2d 1378 ld1 {v24.2d},[x3],#16 1379 ext v25.16b,v25.16b,v25.16b,#8 1380 ext v5.16b,v4.16b,v2.16b,#8 1381 ext v6.16b,v0.16b,v4.16b,#8 1382 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1383.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1384 ext v7.16b,v17.16b,v18.16b,#8 1385.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1386.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1387 add v1.2d,v0.2d,v2.2d // "D + T1" 1388.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1389 add v24.2d,v24.2d,v22.2d 1390 ld1 {v25.2d},[x3],#16 1391 ext v24.16b,v24.16b,v24.16b,#8 1392 ext v5.16b,v1.16b,v4.16b,#8 1393 ext v6.16b,v3.16b,v1.16b,#8 1394 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1395.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1396 ext v7.16b,v18.16b,v19.16b,#8 1397.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1398.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1399 add v0.2d,v3.2d,v4.2d // "D + T1" 1400.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1401 add v25.2d,v25.2d,v23.2d 1402 ld1 {v24.2d},[x3],#16 1403 ext v25.16b,v25.16b,v25.16b,#8 1404 ext v5.16b,v0.16b,v1.16b,#8 1405 ext v6.16b,v2.16b,v0.16b,#8 1406 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1407.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1408 ext v7.16b,v19.16b,v20.16b,#8 1409.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1410.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1411 add v3.2d,v2.2d,v1.2d // "D + T1" 1412.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1413 add v24.2d,v24.2d,v16.2d 1414 ld1 {v25.2d},[x3],#16 1415 ext v24.16b,v24.16b,v24.16b,#8 1416 ext v5.16b,v3.16b,v0.16b,#8 1417 ext v6.16b,v4.16b,v3.16b,#8 1418 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1419.long 0xcec08230 //sha512su0 v16.16b,v17.16b 1420 ext v7.16b,v20.16b,v21.16b,#8 1421.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1422.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1423 add v2.2d,v4.2d,v0.2d // "D + T1" 1424.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1425 add v25.2d,v25.2d,v17.2d 1426 ld1 {v24.2d},[x3],#16 1427 ext v25.16b,v25.16b,v25.16b,#8 1428 ext v5.16b,v2.16b,v3.16b,#8 1429 ext v6.16b,v1.16b,v2.16b,#8 1430 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1431.long 0xcec08251 //sha512su0 v17.16b,v18.16b 1432 ext v7.16b,v21.16b,v22.16b,#8 1433.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1434.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1435 add v4.2d,v1.2d,v3.2d // "D + T1" 1436.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1437 add v24.2d,v24.2d,v18.2d 1438 ld1 {v25.2d},[x3],#16 1439 ext v24.16b,v24.16b,v24.16b,#8 1440 ext v5.16b,v4.16b,v2.16b,#8 1441 ext v6.16b,v0.16b,v4.16b,#8 1442 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1443.long 0xcec08272 //sha512su0 v18.16b,v19.16b 1444 ext v7.16b,v22.16b,v23.16b,#8 1445.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1446.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1447 add v1.2d,v0.2d,v2.2d // "D + T1" 1448.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1449 add v25.2d,v25.2d,v19.2d 1450 ld1 {v24.2d},[x3],#16 1451 ext v25.16b,v25.16b,v25.16b,#8 1452 ext v5.16b,v1.16b,v4.16b,#8 1453 ext v6.16b,v3.16b,v1.16b,#8 1454 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1455.long 0xcec08293 //sha512su0 v19.16b,v20.16b 1456 ext v7.16b,v23.16b,v16.16b,#8 1457.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1458.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1459 add v0.2d,v3.2d,v4.2d // "D + T1" 1460.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1461 add v24.2d,v24.2d,v20.2d 1462 ld1 {v25.2d},[x3],#16 1463 ext v24.16b,v24.16b,v24.16b,#8 1464 ext v5.16b,v0.16b,v1.16b,#8 1465 ext v6.16b,v2.16b,v0.16b,#8 1466 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1467.long 0xcec082b4 //sha512su0 v20.16b,v21.16b 1468 ext v7.16b,v16.16b,v17.16b,#8 1469.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1470.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1471 add v3.2d,v2.2d,v1.2d // "D + T1" 1472.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1473 add v25.2d,v25.2d,v21.2d 1474 ld1 {v24.2d},[x3],#16 1475 ext v25.16b,v25.16b,v25.16b,#8 1476 ext v5.16b,v3.16b,v0.16b,#8 1477 ext v6.16b,v4.16b,v3.16b,#8 1478 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1479.long 0xcec082d5 //sha512su0 v21.16b,v22.16b 1480 ext v7.16b,v17.16b,v18.16b,#8 1481.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1482.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1483 add v2.2d,v4.2d,v0.2d // "D + T1" 1484.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1485 add v24.2d,v24.2d,v22.2d 1486 ld1 {v25.2d},[x3],#16 1487 ext v24.16b,v24.16b,v24.16b,#8 1488 ext v5.16b,v2.16b,v3.16b,#8 1489 ext v6.16b,v1.16b,v2.16b,#8 1490 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1491.long 0xcec082f6 //sha512su0 v22.16b,v23.16b 1492 ext v7.16b,v18.16b,v19.16b,#8 1493.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1494.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1495 add v4.2d,v1.2d,v3.2d // "D + T1" 1496.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1497 add v25.2d,v25.2d,v23.2d 1498 ld1 {v24.2d},[x3],#16 1499 ext v25.16b,v25.16b,v25.16b,#8 1500 ext v5.16b,v4.16b,v2.16b,#8 1501 ext v6.16b,v0.16b,v4.16b,#8 1502 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1503.long 0xcec08217 //sha512su0 v23.16b,v16.16b 1504 ext v7.16b,v19.16b,v20.16b,#8 1505.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1506.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1507 add v1.2d,v0.2d,v2.2d // "D + T1" 1508.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1509 ld1 {v25.2d},[x3],#16 1510 add v24.2d,v24.2d,v16.2d 1511 ld1 {v16.16b},[x1],#16 // load next input 1512 ext v24.16b,v24.16b,v24.16b,#8 1513 ext v5.16b,v1.16b,v4.16b,#8 1514 ext v6.16b,v3.16b,v1.16b,#8 1515 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1516.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1517 rev64 v16.16b,v16.16b 1518 add v0.2d,v3.2d,v4.2d // "D + T1" 1519.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1520 ld1 {v24.2d},[x3],#16 1521 add v25.2d,v25.2d,v17.2d 1522 ld1 {v17.16b},[x1],#16 // load next input 1523 ext v25.16b,v25.16b,v25.16b,#8 1524 ext v5.16b,v0.16b,v1.16b,#8 1525 ext v6.16b,v2.16b,v0.16b,#8 1526 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1527.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1528 rev64 v17.16b,v17.16b 1529 add v3.2d,v2.2d,v1.2d // "D + T1" 1530.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1531 ld1 {v25.2d},[x3],#16 1532 add v24.2d,v24.2d,v18.2d 1533 ld1 {v18.16b},[x1],#16 // load next input 1534 ext v24.16b,v24.16b,v24.16b,#8 1535 ext v5.16b,v3.16b,v0.16b,#8 1536 ext v6.16b,v4.16b,v3.16b,#8 1537 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1538.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1539 rev64 v18.16b,v18.16b 1540 add v2.2d,v4.2d,v0.2d // "D + T1" 1541.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1542 ld1 {v24.2d},[x3],#16 1543 add v25.2d,v25.2d,v19.2d 1544 ld1 {v19.16b},[x1],#16 // load next input 1545 ext v25.16b,v25.16b,v25.16b,#8 1546 ext v5.16b,v2.16b,v3.16b,#8 1547 ext v6.16b,v1.16b,v2.16b,#8 1548 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1549.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1550 rev64 v19.16b,v19.16b 1551 add v4.2d,v1.2d,v3.2d // "D + T1" 1552.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1553 ld1 {v25.2d},[x3],#16 1554 add v24.2d,v24.2d,v20.2d 1555 ld1 {v20.16b},[x1],#16 // load next input 1556 ext v24.16b,v24.16b,v24.16b,#8 1557 ext v5.16b,v4.16b,v2.16b,#8 1558 ext v6.16b,v0.16b,v4.16b,#8 1559 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1560.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1561 rev64 v20.16b,v20.16b 1562 add v1.2d,v0.2d,v2.2d // "D + T1" 1563.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1564 ld1 {v24.2d},[x3],#16 1565 add v25.2d,v25.2d,v21.2d 1566 ld1 {v21.16b},[x1],#16 // load next input 1567 ext v25.16b,v25.16b,v25.16b,#8 1568 ext v5.16b,v1.16b,v4.16b,#8 1569 ext v6.16b,v3.16b,v1.16b,#8 1570 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1571.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1572 rev64 v21.16b,v21.16b 1573 add v0.2d,v3.2d,v4.2d // "D + T1" 1574.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1575 ld1 {v25.2d},[x3],#16 1576 add v24.2d,v24.2d,v22.2d 1577 ld1 {v22.16b},[x1],#16 // load next input 1578 ext v24.16b,v24.16b,v24.16b,#8 1579 ext v5.16b,v0.16b,v1.16b,#8 1580 ext v6.16b,v2.16b,v0.16b,#8 1581 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1582.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1583 rev64 v22.16b,v22.16b 1584 add v3.2d,v2.2d,v1.2d // "D + T1" 1585.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1586 sub x3,x3,#80*8 // rewind 1587 add v25.2d,v25.2d,v23.2d 1588 ld1 {v23.16b},[x1],#16 // load next input 1589 ext v25.16b,v25.16b,v25.16b,#8 1590 ext v5.16b,v3.16b,v0.16b,#8 1591 ext v6.16b,v4.16b,v3.16b,#8 1592 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1593.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1594 rev64 v23.16b,v23.16b 1595 add v2.2d,v4.2d,v0.2d // "D + T1" 1596.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1597 add v0.2d,v0.2d,v26.2d // accumulate 1598 add v1.2d,v1.2d,v27.2d 1599 add v2.2d,v2.2d,v28.2d 1600 add v3.2d,v3.2d,v29.2d 1601 1602 cbnz x2,Loop_hw 1603 1604 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1605 1606 ldr x29,[sp],#16 1607 ret 1608 1609#endif 1610#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(_WIN32) 1611