1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#include <ring-core/asm_base.h> 5 6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) && defined(__ELF__) 7// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 8// 9// Licensed under the OpenSSL license (the "License"). You may not use 10// this file except in compliance with the License. You can obtain a copy 11// in the file LICENSE in the source distribution or at 12// https://www.openssl.org/source/license.html 13 14// ==================================================================== 15// Written by Andy Polyakov <[email protected]> for the OpenSSL 16// project. The module is, however, dual licensed under OpenSSL and 17// CRYPTOGAMS licenses depending on where you obtain it. For further 18// details see http://www.openssl.org/~appro/cryptogams/. 19// 20// Permission to use under GPLv2 terms is granted. 21// ==================================================================== 22// 23// SHA256/512 for ARMv8. 24// 25// Performance in cycles per processed byte and improvement coefficient 26// over code generated with "default" compiler: 27// 28// SHA256-hw SHA256(*) SHA512 29// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 30// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 31// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 32// Denver 2.01 10.5 (+26%) 6.70 (+8%) 33// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 34// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 35// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 36// 37// (*) Software SHA256 results are of lesser relevance, presented 38// mostly for informational purposes. 39// (**) The result is a trade-off: it's possible to improve it by 40// 10% (or by 1 cycle per round), but at the cost of 20% loss 41// on Cortex-A53 (or by 4 cycles per round). 42// (***) Super-impressive coefficients over gcc-generated code are 43// indication of some compiler "pathology", most notably code 44// generated with -mgeneral-regs-only is significantly faster 45// and the gap is only 40-90%. 46 47#ifndef __KERNEL__ 48# include <ring-core/arm_arch.h> 49#endif 50 51.text 52 53 54.hidden OPENSSL_armcap_P 55.globl sha512_block_data_order 56.hidden sha512_block_data_order 57.type sha512_block_data_order,%function 58.align 6 59sha512_block_data_order: 60 AARCH64_VALID_CALL_TARGET 61#ifndef __KERNEL__ 62#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 63 adrp x16,:pg_hi21_nc:OPENSSL_armcap_P 64#else 65 adrp x16,OPENSSL_armcap_P 66#endif 67 ldr w16,[x16,:lo12:OPENSSL_armcap_P] 68 tst w16,#ARMV8_SHA512 69 b.ne .Lv8_entry 70#endif 71 AARCH64_SIGN_LINK_REGISTER 72 stp x29,x30,[sp,#-128]! 73 add x29,sp,#0 74 75 stp x19,x20,[sp,#16] 76 stp x21,x22,[sp,#32] 77 stp x23,x24,[sp,#48] 78 stp x25,x26,[sp,#64] 79 stp x27,x28,[sp,#80] 80 sub sp,sp,#4*8 81 82 ldp x20,x21,[x0] // load context 83 ldp x22,x23,[x0,#2*8] 84 ldp x24,x25,[x0,#4*8] 85 add x2,x1,x2,lsl#7 // end of input 86 ldp x26,x27,[x0,#6*8] 87 adrp x30,.LK512 88 add x30,x30,:lo12:.LK512 89 stp x0,x2,[x29,#96] 90 91.Loop: 92 ldp x3,x4,[x1],#2*8 93 ldr x19,[x30],#8 // *K++ 94 eor x28,x21,x22 // magic seed 95 str x1,[x29,#112] 96#ifndef __AARCH64EB__ 97 rev x3,x3 // 0 98#endif 99 ror x16,x24,#14 100 add x27,x27,x19 // h+=K[i] 101 eor x6,x24,x24,ror#23 102 and x17,x25,x24 103 bic x19,x26,x24 104 add x27,x27,x3 // h+=X[i] 105 orr x17,x17,x19 // Ch(e,f,g) 106 eor x19,x20,x21 // a^b, b^c in next round 107 eor x16,x16,x6,ror#18 // Sigma1(e) 108 ror x6,x20,#28 109 add x27,x27,x17 // h+=Ch(e,f,g) 110 eor x17,x20,x20,ror#5 111 add x27,x27,x16 // h+=Sigma1(e) 112 and x28,x28,x19 // (b^c)&=(a^b) 113 add x23,x23,x27 // d+=h 114 eor x28,x28,x21 // Maj(a,b,c) 115 eor x17,x6,x17,ror#34 // Sigma0(a) 116 add x27,x27,x28 // h+=Maj(a,b,c) 117 ldr x28,[x30],#8 // *K++, x19 in next round 118 //add x27,x27,x17 // h+=Sigma0(a) 119#ifndef __AARCH64EB__ 120 rev x4,x4 // 1 121#endif 122 ldp x5,x6,[x1],#2*8 123 add x27,x27,x17 // h+=Sigma0(a) 124 ror x16,x23,#14 125 add x26,x26,x28 // h+=K[i] 126 eor x7,x23,x23,ror#23 127 and x17,x24,x23 128 bic x28,x25,x23 129 add x26,x26,x4 // h+=X[i] 130 orr x17,x17,x28 // Ch(e,f,g) 131 eor x28,x27,x20 // a^b, b^c in next round 132 eor x16,x16,x7,ror#18 // Sigma1(e) 133 ror x7,x27,#28 134 add x26,x26,x17 // h+=Ch(e,f,g) 135 eor x17,x27,x27,ror#5 136 add x26,x26,x16 // h+=Sigma1(e) 137 and x19,x19,x28 // (b^c)&=(a^b) 138 add x22,x22,x26 // d+=h 139 eor x19,x19,x20 // Maj(a,b,c) 140 eor x17,x7,x17,ror#34 // Sigma0(a) 141 add x26,x26,x19 // h+=Maj(a,b,c) 142 ldr x19,[x30],#8 // *K++, x28 in next round 143 //add x26,x26,x17 // h+=Sigma0(a) 144#ifndef __AARCH64EB__ 145 rev x5,x5 // 2 146#endif 147 add x26,x26,x17 // h+=Sigma0(a) 148 ror x16,x22,#14 149 add x25,x25,x19 // h+=K[i] 150 eor x8,x22,x22,ror#23 151 and x17,x23,x22 152 bic x19,x24,x22 153 add x25,x25,x5 // h+=X[i] 154 orr x17,x17,x19 // Ch(e,f,g) 155 eor x19,x26,x27 // a^b, b^c in next round 156 eor x16,x16,x8,ror#18 // Sigma1(e) 157 ror x8,x26,#28 158 add x25,x25,x17 // h+=Ch(e,f,g) 159 eor x17,x26,x26,ror#5 160 add x25,x25,x16 // h+=Sigma1(e) 161 and x28,x28,x19 // (b^c)&=(a^b) 162 add x21,x21,x25 // d+=h 163 eor x28,x28,x27 // Maj(a,b,c) 164 eor x17,x8,x17,ror#34 // Sigma0(a) 165 add x25,x25,x28 // h+=Maj(a,b,c) 166 ldr x28,[x30],#8 // *K++, x19 in next round 167 //add x25,x25,x17 // h+=Sigma0(a) 168#ifndef __AARCH64EB__ 169 rev x6,x6 // 3 170#endif 171 ldp x7,x8,[x1],#2*8 172 add x25,x25,x17 // h+=Sigma0(a) 173 ror x16,x21,#14 174 add x24,x24,x28 // h+=K[i] 175 eor x9,x21,x21,ror#23 176 and x17,x22,x21 177 bic x28,x23,x21 178 add x24,x24,x6 // h+=X[i] 179 orr x17,x17,x28 // Ch(e,f,g) 180 eor x28,x25,x26 // a^b, b^c in next round 181 eor x16,x16,x9,ror#18 // Sigma1(e) 182 ror x9,x25,#28 183 add x24,x24,x17 // h+=Ch(e,f,g) 184 eor x17,x25,x25,ror#5 185 add x24,x24,x16 // h+=Sigma1(e) 186 and x19,x19,x28 // (b^c)&=(a^b) 187 add x20,x20,x24 // d+=h 188 eor x19,x19,x26 // Maj(a,b,c) 189 eor x17,x9,x17,ror#34 // Sigma0(a) 190 add x24,x24,x19 // h+=Maj(a,b,c) 191 ldr x19,[x30],#8 // *K++, x28 in next round 192 //add x24,x24,x17 // h+=Sigma0(a) 193#ifndef __AARCH64EB__ 194 rev x7,x7 // 4 195#endif 196 add x24,x24,x17 // h+=Sigma0(a) 197 ror x16,x20,#14 198 add x23,x23,x19 // h+=K[i] 199 eor x10,x20,x20,ror#23 200 and x17,x21,x20 201 bic x19,x22,x20 202 add x23,x23,x7 // h+=X[i] 203 orr x17,x17,x19 // Ch(e,f,g) 204 eor x19,x24,x25 // a^b, b^c in next round 205 eor x16,x16,x10,ror#18 // Sigma1(e) 206 ror x10,x24,#28 207 add x23,x23,x17 // h+=Ch(e,f,g) 208 eor x17,x24,x24,ror#5 209 add x23,x23,x16 // h+=Sigma1(e) 210 and x28,x28,x19 // (b^c)&=(a^b) 211 add x27,x27,x23 // d+=h 212 eor x28,x28,x25 // Maj(a,b,c) 213 eor x17,x10,x17,ror#34 // Sigma0(a) 214 add x23,x23,x28 // h+=Maj(a,b,c) 215 ldr x28,[x30],#8 // *K++, x19 in next round 216 //add x23,x23,x17 // h+=Sigma0(a) 217#ifndef __AARCH64EB__ 218 rev x8,x8 // 5 219#endif 220 ldp x9,x10,[x1],#2*8 221 add x23,x23,x17 // h+=Sigma0(a) 222 ror x16,x27,#14 223 add x22,x22,x28 // h+=K[i] 224 eor x11,x27,x27,ror#23 225 and x17,x20,x27 226 bic x28,x21,x27 227 add x22,x22,x8 // h+=X[i] 228 orr x17,x17,x28 // Ch(e,f,g) 229 eor x28,x23,x24 // a^b, b^c in next round 230 eor x16,x16,x11,ror#18 // Sigma1(e) 231 ror x11,x23,#28 232 add x22,x22,x17 // h+=Ch(e,f,g) 233 eor x17,x23,x23,ror#5 234 add x22,x22,x16 // h+=Sigma1(e) 235 and x19,x19,x28 // (b^c)&=(a^b) 236 add x26,x26,x22 // d+=h 237 eor x19,x19,x24 // Maj(a,b,c) 238 eor x17,x11,x17,ror#34 // Sigma0(a) 239 add x22,x22,x19 // h+=Maj(a,b,c) 240 ldr x19,[x30],#8 // *K++, x28 in next round 241 //add x22,x22,x17 // h+=Sigma0(a) 242#ifndef __AARCH64EB__ 243 rev x9,x9 // 6 244#endif 245 add x22,x22,x17 // h+=Sigma0(a) 246 ror x16,x26,#14 247 add x21,x21,x19 // h+=K[i] 248 eor x12,x26,x26,ror#23 249 and x17,x27,x26 250 bic x19,x20,x26 251 add x21,x21,x9 // h+=X[i] 252 orr x17,x17,x19 // Ch(e,f,g) 253 eor x19,x22,x23 // a^b, b^c in next round 254 eor x16,x16,x12,ror#18 // Sigma1(e) 255 ror x12,x22,#28 256 add x21,x21,x17 // h+=Ch(e,f,g) 257 eor x17,x22,x22,ror#5 258 add x21,x21,x16 // h+=Sigma1(e) 259 and x28,x28,x19 // (b^c)&=(a^b) 260 add x25,x25,x21 // d+=h 261 eor x28,x28,x23 // Maj(a,b,c) 262 eor x17,x12,x17,ror#34 // Sigma0(a) 263 add x21,x21,x28 // h+=Maj(a,b,c) 264 ldr x28,[x30],#8 // *K++, x19 in next round 265 //add x21,x21,x17 // h+=Sigma0(a) 266#ifndef __AARCH64EB__ 267 rev x10,x10 // 7 268#endif 269 ldp x11,x12,[x1],#2*8 270 add x21,x21,x17 // h+=Sigma0(a) 271 ror x16,x25,#14 272 add x20,x20,x28 // h+=K[i] 273 eor x13,x25,x25,ror#23 274 and x17,x26,x25 275 bic x28,x27,x25 276 add x20,x20,x10 // h+=X[i] 277 orr x17,x17,x28 // Ch(e,f,g) 278 eor x28,x21,x22 // a^b, b^c in next round 279 eor x16,x16,x13,ror#18 // Sigma1(e) 280 ror x13,x21,#28 281 add x20,x20,x17 // h+=Ch(e,f,g) 282 eor x17,x21,x21,ror#5 283 add x20,x20,x16 // h+=Sigma1(e) 284 and x19,x19,x28 // (b^c)&=(a^b) 285 add x24,x24,x20 // d+=h 286 eor x19,x19,x22 // Maj(a,b,c) 287 eor x17,x13,x17,ror#34 // Sigma0(a) 288 add x20,x20,x19 // h+=Maj(a,b,c) 289 ldr x19,[x30],#8 // *K++, x28 in next round 290 //add x20,x20,x17 // h+=Sigma0(a) 291#ifndef __AARCH64EB__ 292 rev x11,x11 // 8 293#endif 294 add x20,x20,x17 // h+=Sigma0(a) 295 ror x16,x24,#14 296 add x27,x27,x19 // h+=K[i] 297 eor x14,x24,x24,ror#23 298 and x17,x25,x24 299 bic x19,x26,x24 300 add x27,x27,x11 // h+=X[i] 301 orr x17,x17,x19 // Ch(e,f,g) 302 eor x19,x20,x21 // a^b, b^c in next round 303 eor x16,x16,x14,ror#18 // Sigma1(e) 304 ror x14,x20,#28 305 add x27,x27,x17 // h+=Ch(e,f,g) 306 eor x17,x20,x20,ror#5 307 add x27,x27,x16 // h+=Sigma1(e) 308 and x28,x28,x19 // (b^c)&=(a^b) 309 add x23,x23,x27 // d+=h 310 eor x28,x28,x21 // Maj(a,b,c) 311 eor x17,x14,x17,ror#34 // Sigma0(a) 312 add x27,x27,x28 // h+=Maj(a,b,c) 313 ldr x28,[x30],#8 // *K++, x19 in next round 314 //add x27,x27,x17 // h+=Sigma0(a) 315#ifndef __AARCH64EB__ 316 rev x12,x12 // 9 317#endif 318 ldp x13,x14,[x1],#2*8 319 add x27,x27,x17 // h+=Sigma0(a) 320 ror x16,x23,#14 321 add x26,x26,x28 // h+=K[i] 322 eor x15,x23,x23,ror#23 323 and x17,x24,x23 324 bic x28,x25,x23 325 add x26,x26,x12 // h+=X[i] 326 orr x17,x17,x28 // Ch(e,f,g) 327 eor x28,x27,x20 // a^b, b^c in next round 328 eor x16,x16,x15,ror#18 // Sigma1(e) 329 ror x15,x27,#28 330 add x26,x26,x17 // h+=Ch(e,f,g) 331 eor x17,x27,x27,ror#5 332 add x26,x26,x16 // h+=Sigma1(e) 333 and x19,x19,x28 // (b^c)&=(a^b) 334 add x22,x22,x26 // d+=h 335 eor x19,x19,x20 // Maj(a,b,c) 336 eor x17,x15,x17,ror#34 // Sigma0(a) 337 add x26,x26,x19 // h+=Maj(a,b,c) 338 ldr x19,[x30],#8 // *K++, x28 in next round 339 //add x26,x26,x17 // h+=Sigma0(a) 340#ifndef __AARCH64EB__ 341 rev x13,x13 // 10 342#endif 343 add x26,x26,x17 // h+=Sigma0(a) 344 ror x16,x22,#14 345 add x25,x25,x19 // h+=K[i] 346 eor x0,x22,x22,ror#23 347 and x17,x23,x22 348 bic x19,x24,x22 349 add x25,x25,x13 // h+=X[i] 350 orr x17,x17,x19 // Ch(e,f,g) 351 eor x19,x26,x27 // a^b, b^c in next round 352 eor x16,x16,x0,ror#18 // Sigma1(e) 353 ror x0,x26,#28 354 add x25,x25,x17 // h+=Ch(e,f,g) 355 eor x17,x26,x26,ror#5 356 add x25,x25,x16 // h+=Sigma1(e) 357 and x28,x28,x19 // (b^c)&=(a^b) 358 add x21,x21,x25 // d+=h 359 eor x28,x28,x27 // Maj(a,b,c) 360 eor x17,x0,x17,ror#34 // Sigma0(a) 361 add x25,x25,x28 // h+=Maj(a,b,c) 362 ldr x28,[x30],#8 // *K++, x19 in next round 363 //add x25,x25,x17 // h+=Sigma0(a) 364#ifndef __AARCH64EB__ 365 rev x14,x14 // 11 366#endif 367 ldp x15,x0,[x1],#2*8 368 add x25,x25,x17 // h+=Sigma0(a) 369 str x6,[sp,#24] 370 ror x16,x21,#14 371 add x24,x24,x28 // h+=K[i] 372 eor x6,x21,x21,ror#23 373 and x17,x22,x21 374 bic x28,x23,x21 375 add x24,x24,x14 // h+=X[i] 376 orr x17,x17,x28 // Ch(e,f,g) 377 eor x28,x25,x26 // a^b, b^c in next round 378 eor x16,x16,x6,ror#18 // Sigma1(e) 379 ror x6,x25,#28 380 add x24,x24,x17 // h+=Ch(e,f,g) 381 eor x17,x25,x25,ror#5 382 add x24,x24,x16 // h+=Sigma1(e) 383 and x19,x19,x28 // (b^c)&=(a^b) 384 add x20,x20,x24 // d+=h 385 eor x19,x19,x26 // Maj(a,b,c) 386 eor x17,x6,x17,ror#34 // Sigma0(a) 387 add x24,x24,x19 // h+=Maj(a,b,c) 388 ldr x19,[x30],#8 // *K++, x28 in next round 389 //add x24,x24,x17 // h+=Sigma0(a) 390#ifndef __AARCH64EB__ 391 rev x15,x15 // 12 392#endif 393 add x24,x24,x17 // h+=Sigma0(a) 394 str x7,[sp,#0] 395 ror x16,x20,#14 396 add x23,x23,x19 // h+=K[i] 397 eor x7,x20,x20,ror#23 398 and x17,x21,x20 399 bic x19,x22,x20 400 add x23,x23,x15 // h+=X[i] 401 orr x17,x17,x19 // Ch(e,f,g) 402 eor x19,x24,x25 // a^b, b^c in next round 403 eor x16,x16,x7,ror#18 // Sigma1(e) 404 ror x7,x24,#28 405 add x23,x23,x17 // h+=Ch(e,f,g) 406 eor x17,x24,x24,ror#5 407 add x23,x23,x16 // h+=Sigma1(e) 408 and x28,x28,x19 // (b^c)&=(a^b) 409 add x27,x27,x23 // d+=h 410 eor x28,x28,x25 // Maj(a,b,c) 411 eor x17,x7,x17,ror#34 // Sigma0(a) 412 add x23,x23,x28 // h+=Maj(a,b,c) 413 ldr x28,[x30],#8 // *K++, x19 in next round 414 //add x23,x23,x17 // h+=Sigma0(a) 415#ifndef __AARCH64EB__ 416 rev x0,x0 // 13 417#endif 418 ldp x1,x2,[x1] 419 add x23,x23,x17 // h+=Sigma0(a) 420 str x8,[sp,#8] 421 ror x16,x27,#14 422 add x22,x22,x28 // h+=K[i] 423 eor x8,x27,x27,ror#23 424 and x17,x20,x27 425 bic x28,x21,x27 426 add x22,x22,x0 // h+=X[i] 427 orr x17,x17,x28 // Ch(e,f,g) 428 eor x28,x23,x24 // a^b, b^c in next round 429 eor x16,x16,x8,ror#18 // Sigma1(e) 430 ror x8,x23,#28 431 add x22,x22,x17 // h+=Ch(e,f,g) 432 eor x17,x23,x23,ror#5 433 add x22,x22,x16 // h+=Sigma1(e) 434 and x19,x19,x28 // (b^c)&=(a^b) 435 add x26,x26,x22 // d+=h 436 eor x19,x19,x24 // Maj(a,b,c) 437 eor x17,x8,x17,ror#34 // Sigma0(a) 438 add x22,x22,x19 // h+=Maj(a,b,c) 439 ldr x19,[x30],#8 // *K++, x28 in next round 440 //add x22,x22,x17 // h+=Sigma0(a) 441#ifndef __AARCH64EB__ 442 rev x1,x1 // 14 443#endif 444 ldr x6,[sp,#24] 445 add x22,x22,x17 // h+=Sigma0(a) 446 str x9,[sp,#16] 447 ror x16,x26,#14 448 add x21,x21,x19 // h+=K[i] 449 eor x9,x26,x26,ror#23 450 and x17,x27,x26 451 bic x19,x20,x26 452 add x21,x21,x1 // h+=X[i] 453 orr x17,x17,x19 // Ch(e,f,g) 454 eor x19,x22,x23 // a^b, b^c in next round 455 eor x16,x16,x9,ror#18 // Sigma1(e) 456 ror x9,x22,#28 457 add x21,x21,x17 // h+=Ch(e,f,g) 458 eor x17,x22,x22,ror#5 459 add x21,x21,x16 // h+=Sigma1(e) 460 and x28,x28,x19 // (b^c)&=(a^b) 461 add x25,x25,x21 // d+=h 462 eor x28,x28,x23 // Maj(a,b,c) 463 eor x17,x9,x17,ror#34 // Sigma0(a) 464 add x21,x21,x28 // h+=Maj(a,b,c) 465 ldr x28,[x30],#8 // *K++, x19 in next round 466 //add x21,x21,x17 // h+=Sigma0(a) 467#ifndef __AARCH64EB__ 468 rev x2,x2 // 15 469#endif 470 ldr x7,[sp,#0] 471 add x21,x21,x17 // h+=Sigma0(a) 472 str x10,[sp,#24] 473 ror x16,x25,#14 474 add x20,x20,x28 // h+=K[i] 475 ror x9,x4,#1 476 and x17,x26,x25 477 ror x8,x1,#19 478 bic x28,x27,x25 479 ror x10,x21,#28 480 add x20,x20,x2 // h+=X[i] 481 eor x16,x16,x25,ror#18 482 eor x9,x9,x4,ror#8 483 orr x17,x17,x28 // Ch(e,f,g) 484 eor x28,x21,x22 // a^b, b^c in next round 485 eor x16,x16,x25,ror#41 // Sigma1(e) 486 eor x10,x10,x21,ror#34 487 add x20,x20,x17 // h+=Ch(e,f,g) 488 and x19,x19,x28 // (b^c)&=(a^b) 489 eor x8,x8,x1,ror#61 490 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 491 add x20,x20,x16 // h+=Sigma1(e) 492 eor x19,x19,x22 // Maj(a,b,c) 493 eor x17,x10,x21,ror#39 // Sigma0(a) 494 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 495 add x3,x3,x12 496 add x24,x24,x20 // d+=h 497 add x20,x20,x19 // h+=Maj(a,b,c) 498 ldr x19,[x30],#8 // *K++, x28 in next round 499 add x3,x3,x9 500 add x20,x20,x17 // h+=Sigma0(a) 501 add x3,x3,x8 502.Loop_16_xx: 503 ldr x8,[sp,#8] 504 str x11,[sp,#0] 505 ror x16,x24,#14 506 add x27,x27,x19 // h+=K[i] 507 ror x10,x5,#1 508 and x17,x25,x24 509 ror x9,x2,#19 510 bic x19,x26,x24 511 ror x11,x20,#28 512 add x27,x27,x3 // h+=X[i] 513 eor x16,x16,x24,ror#18 514 eor x10,x10,x5,ror#8 515 orr x17,x17,x19 // Ch(e,f,g) 516 eor x19,x20,x21 // a^b, b^c in next round 517 eor x16,x16,x24,ror#41 // Sigma1(e) 518 eor x11,x11,x20,ror#34 519 add x27,x27,x17 // h+=Ch(e,f,g) 520 and x28,x28,x19 // (b^c)&=(a^b) 521 eor x9,x9,x2,ror#61 522 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 523 add x27,x27,x16 // h+=Sigma1(e) 524 eor x28,x28,x21 // Maj(a,b,c) 525 eor x17,x11,x20,ror#39 // Sigma0(a) 526 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 527 add x4,x4,x13 528 add x23,x23,x27 // d+=h 529 add x27,x27,x28 // h+=Maj(a,b,c) 530 ldr x28,[x30],#8 // *K++, x19 in next round 531 add x4,x4,x10 532 add x27,x27,x17 // h+=Sigma0(a) 533 add x4,x4,x9 534 ldr x9,[sp,#16] 535 str x12,[sp,#8] 536 ror x16,x23,#14 537 add x26,x26,x28 // h+=K[i] 538 ror x11,x6,#1 539 and x17,x24,x23 540 ror x10,x3,#19 541 bic x28,x25,x23 542 ror x12,x27,#28 543 add x26,x26,x4 // h+=X[i] 544 eor x16,x16,x23,ror#18 545 eor x11,x11,x6,ror#8 546 orr x17,x17,x28 // Ch(e,f,g) 547 eor x28,x27,x20 // a^b, b^c in next round 548 eor x16,x16,x23,ror#41 // Sigma1(e) 549 eor x12,x12,x27,ror#34 550 add x26,x26,x17 // h+=Ch(e,f,g) 551 and x19,x19,x28 // (b^c)&=(a^b) 552 eor x10,x10,x3,ror#61 553 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 554 add x26,x26,x16 // h+=Sigma1(e) 555 eor x19,x19,x20 // Maj(a,b,c) 556 eor x17,x12,x27,ror#39 // Sigma0(a) 557 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 558 add x5,x5,x14 559 add x22,x22,x26 // d+=h 560 add x26,x26,x19 // h+=Maj(a,b,c) 561 ldr x19,[x30],#8 // *K++, x28 in next round 562 add x5,x5,x11 563 add x26,x26,x17 // h+=Sigma0(a) 564 add x5,x5,x10 565 ldr x10,[sp,#24] 566 str x13,[sp,#16] 567 ror x16,x22,#14 568 add x25,x25,x19 // h+=K[i] 569 ror x12,x7,#1 570 and x17,x23,x22 571 ror x11,x4,#19 572 bic x19,x24,x22 573 ror x13,x26,#28 574 add x25,x25,x5 // h+=X[i] 575 eor x16,x16,x22,ror#18 576 eor x12,x12,x7,ror#8 577 orr x17,x17,x19 // Ch(e,f,g) 578 eor x19,x26,x27 // a^b, b^c in next round 579 eor x16,x16,x22,ror#41 // Sigma1(e) 580 eor x13,x13,x26,ror#34 581 add x25,x25,x17 // h+=Ch(e,f,g) 582 and x28,x28,x19 // (b^c)&=(a^b) 583 eor x11,x11,x4,ror#61 584 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 585 add x25,x25,x16 // h+=Sigma1(e) 586 eor x28,x28,x27 // Maj(a,b,c) 587 eor x17,x13,x26,ror#39 // Sigma0(a) 588 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 589 add x6,x6,x15 590 add x21,x21,x25 // d+=h 591 add x25,x25,x28 // h+=Maj(a,b,c) 592 ldr x28,[x30],#8 // *K++, x19 in next round 593 add x6,x6,x12 594 add x25,x25,x17 // h+=Sigma0(a) 595 add x6,x6,x11 596 ldr x11,[sp,#0] 597 str x14,[sp,#24] 598 ror x16,x21,#14 599 add x24,x24,x28 // h+=K[i] 600 ror x13,x8,#1 601 and x17,x22,x21 602 ror x12,x5,#19 603 bic x28,x23,x21 604 ror x14,x25,#28 605 add x24,x24,x6 // h+=X[i] 606 eor x16,x16,x21,ror#18 607 eor x13,x13,x8,ror#8 608 orr x17,x17,x28 // Ch(e,f,g) 609 eor x28,x25,x26 // a^b, b^c in next round 610 eor x16,x16,x21,ror#41 // Sigma1(e) 611 eor x14,x14,x25,ror#34 612 add x24,x24,x17 // h+=Ch(e,f,g) 613 and x19,x19,x28 // (b^c)&=(a^b) 614 eor x12,x12,x5,ror#61 615 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 616 add x24,x24,x16 // h+=Sigma1(e) 617 eor x19,x19,x26 // Maj(a,b,c) 618 eor x17,x14,x25,ror#39 // Sigma0(a) 619 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 620 add x7,x7,x0 621 add x20,x20,x24 // d+=h 622 add x24,x24,x19 // h+=Maj(a,b,c) 623 ldr x19,[x30],#8 // *K++, x28 in next round 624 add x7,x7,x13 625 add x24,x24,x17 // h+=Sigma0(a) 626 add x7,x7,x12 627 ldr x12,[sp,#8] 628 str x15,[sp,#0] 629 ror x16,x20,#14 630 add x23,x23,x19 // h+=K[i] 631 ror x14,x9,#1 632 and x17,x21,x20 633 ror x13,x6,#19 634 bic x19,x22,x20 635 ror x15,x24,#28 636 add x23,x23,x7 // h+=X[i] 637 eor x16,x16,x20,ror#18 638 eor x14,x14,x9,ror#8 639 orr x17,x17,x19 // Ch(e,f,g) 640 eor x19,x24,x25 // a^b, b^c in next round 641 eor x16,x16,x20,ror#41 // Sigma1(e) 642 eor x15,x15,x24,ror#34 643 add x23,x23,x17 // h+=Ch(e,f,g) 644 and x28,x28,x19 // (b^c)&=(a^b) 645 eor x13,x13,x6,ror#61 646 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 647 add x23,x23,x16 // h+=Sigma1(e) 648 eor x28,x28,x25 // Maj(a,b,c) 649 eor x17,x15,x24,ror#39 // Sigma0(a) 650 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 651 add x8,x8,x1 652 add x27,x27,x23 // d+=h 653 add x23,x23,x28 // h+=Maj(a,b,c) 654 ldr x28,[x30],#8 // *K++, x19 in next round 655 add x8,x8,x14 656 add x23,x23,x17 // h+=Sigma0(a) 657 add x8,x8,x13 658 ldr x13,[sp,#16] 659 str x0,[sp,#8] 660 ror x16,x27,#14 661 add x22,x22,x28 // h+=K[i] 662 ror x15,x10,#1 663 and x17,x20,x27 664 ror x14,x7,#19 665 bic x28,x21,x27 666 ror x0,x23,#28 667 add x22,x22,x8 // h+=X[i] 668 eor x16,x16,x27,ror#18 669 eor x15,x15,x10,ror#8 670 orr x17,x17,x28 // Ch(e,f,g) 671 eor x28,x23,x24 // a^b, b^c in next round 672 eor x16,x16,x27,ror#41 // Sigma1(e) 673 eor x0,x0,x23,ror#34 674 add x22,x22,x17 // h+=Ch(e,f,g) 675 and x19,x19,x28 // (b^c)&=(a^b) 676 eor x14,x14,x7,ror#61 677 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 678 add x22,x22,x16 // h+=Sigma1(e) 679 eor x19,x19,x24 // Maj(a,b,c) 680 eor x17,x0,x23,ror#39 // Sigma0(a) 681 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 682 add x9,x9,x2 683 add x26,x26,x22 // d+=h 684 add x22,x22,x19 // h+=Maj(a,b,c) 685 ldr x19,[x30],#8 // *K++, x28 in next round 686 add x9,x9,x15 687 add x22,x22,x17 // h+=Sigma0(a) 688 add x9,x9,x14 689 ldr x14,[sp,#24] 690 str x1,[sp,#16] 691 ror x16,x26,#14 692 add x21,x21,x19 // h+=K[i] 693 ror x0,x11,#1 694 and x17,x27,x26 695 ror x15,x8,#19 696 bic x19,x20,x26 697 ror x1,x22,#28 698 add x21,x21,x9 // h+=X[i] 699 eor x16,x16,x26,ror#18 700 eor x0,x0,x11,ror#8 701 orr x17,x17,x19 // Ch(e,f,g) 702 eor x19,x22,x23 // a^b, b^c in next round 703 eor x16,x16,x26,ror#41 // Sigma1(e) 704 eor x1,x1,x22,ror#34 705 add x21,x21,x17 // h+=Ch(e,f,g) 706 and x28,x28,x19 // (b^c)&=(a^b) 707 eor x15,x15,x8,ror#61 708 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 709 add x21,x21,x16 // h+=Sigma1(e) 710 eor x28,x28,x23 // Maj(a,b,c) 711 eor x17,x1,x22,ror#39 // Sigma0(a) 712 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 713 add x10,x10,x3 714 add x25,x25,x21 // d+=h 715 add x21,x21,x28 // h+=Maj(a,b,c) 716 ldr x28,[x30],#8 // *K++, x19 in next round 717 add x10,x10,x0 718 add x21,x21,x17 // h+=Sigma0(a) 719 add x10,x10,x15 720 ldr x15,[sp,#0] 721 str x2,[sp,#24] 722 ror x16,x25,#14 723 add x20,x20,x28 // h+=K[i] 724 ror x1,x12,#1 725 and x17,x26,x25 726 ror x0,x9,#19 727 bic x28,x27,x25 728 ror x2,x21,#28 729 add x20,x20,x10 // h+=X[i] 730 eor x16,x16,x25,ror#18 731 eor x1,x1,x12,ror#8 732 orr x17,x17,x28 // Ch(e,f,g) 733 eor x28,x21,x22 // a^b, b^c in next round 734 eor x16,x16,x25,ror#41 // Sigma1(e) 735 eor x2,x2,x21,ror#34 736 add x20,x20,x17 // h+=Ch(e,f,g) 737 and x19,x19,x28 // (b^c)&=(a^b) 738 eor x0,x0,x9,ror#61 739 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 740 add x20,x20,x16 // h+=Sigma1(e) 741 eor x19,x19,x22 // Maj(a,b,c) 742 eor x17,x2,x21,ror#39 // Sigma0(a) 743 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 744 add x11,x11,x4 745 add x24,x24,x20 // d+=h 746 add x20,x20,x19 // h+=Maj(a,b,c) 747 ldr x19,[x30],#8 // *K++, x28 in next round 748 add x11,x11,x1 749 add x20,x20,x17 // h+=Sigma0(a) 750 add x11,x11,x0 751 ldr x0,[sp,#8] 752 str x3,[sp,#0] 753 ror x16,x24,#14 754 add x27,x27,x19 // h+=K[i] 755 ror x2,x13,#1 756 and x17,x25,x24 757 ror x1,x10,#19 758 bic x19,x26,x24 759 ror x3,x20,#28 760 add x27,x27,x11 // h+=X[i] 761 eor x16,x16,x24,ror#18 762 eor x2,x2,x13,ror#8 763 orr x17,x17,x19 // Ch(e,f,g) 764 eor x19,x20,x21 // a^b, b^c in next round 765 eor x16,x16,x24,ror#41 // Sigma1(e) 766 eor x3,x3,x20,ror#34 767 add x27,x27,x17 // h+=Ch(e,f,g) 768 and x28,x28,x19 // (b^c)&=(a^b) 769 eor x1,x1,x10,ror#61 770 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 771 add x27,x27,x16 // h+=Sigma1(e) 772 eor x28,x28,x21 // Maj(a,b,c) 773 eor x17,x3,x20,ror#39 // Sigma0(a) 774 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 775 add x12,x12,x5 776 add x23,x23,x27 // d+=h 777 add x27,x27,x28 // h+=Maj(a,b,c) 778 ldr x28,[x30],#8 // *K++, x19 in next round 779 add x12,x12,x2 780 add x27,x27,x17 // h+=Sigma0(a) 781 add x12,x12,x1 782 ldr x1,[sp,#16] 783 str x4,[sp,#8] 784 ror x16,x23,#14 785 add x26,x26,x28 // h+=K[i] 786 ror x3,x14,#1 787 and x17,x24,x23 788 ror x2,x11,#19 789 bic x28,x25,x23 790 ror x4,x27,#28 791 add x26,x26,x12 // h+=X[i] 792 eor x16,x16,x23,ror#18 793 eor x3,x3,x14,ror#8 794 orr x17,x17,x28 // Ch(e,f,g) 795 eor x28,x27,x20 // a^b, b^c in next round 796 eor x16,x16,x23,ror#41 // Sigma1(e) 797 eor x4,x4,x27,ror#34 798 add x26,x26,x17 // h+=Ch(e,f,g) 799 and x19,x19,x28 // (b^c)&=(a^b) 800 eor x2,x2,x11,ror#61 801 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 802 add x26,x26,x16 // h+=Sigma1(e) 803 eor x19,x19,x20 // Maj(a,b,c) 804 eor x17,x4,x27,ror#39 // Sigma0(a) 805 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 806 add x13,x13,x6 807 add x22,x22,x26 // d+=h 808 add x26,x26,x19 // h+=Maj(a,b,c) 809 ldr x19,[x30],#8 // *K++, x28 in next round 810 add x13,x13,x3 811 add x26,x26,x17 // h+=Sigma0(a) 812 add x13,x13,x2 813 ldr x2,[sp,#24] 814 str x5,[sp,#16] 815 ror x16,x22,#14 816 add x25,x25,x19 // h+=K[i] 817 ror x4,x15,#1 818 and x17,x23,x22 819 ror x3,x12,#19 820 bic x19,x24,x22 821 ror x5,x26,#28 822 add x25,x25,x13 // h+=X[i] 823 eor x16,x16,x22,ror#18 824 eor x4,x4,x15,ror#8 825 orr x17,x17,x19 // Ch(e,f,g) 826 eor x19,x26,x27 // a^b, b^c in next round 827 eor x16,x16,x22,ror#41 // Sigma1(e) 828 eor x5,x5,x26,ror#34 829 add x25,x25,x17 // h+=Ch(e,f,g) 830 and x28,x28,x19 // (b^c)&=(a^b) 831 eor x3,x3,x12,ror#61 832 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 833 add x25,x25,x16 // h+=Sigma1(e) 834 eor x28,x28,x27 // Maj(a,b,c) 835 eor x17,x5,x26,ror#39 // Sigma0(a) 836 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 837 add x14,x14,x7 838 add x21,x21,x25 // d+=h 839 add x25,x25,x28 // h+=Maj(a,b,c) 840 ldr x28,[x30],#8 // *K++, x19 in next round 841 add x14,x14,x4 842 add x25,x25,x17 // h+=Sigma0(a) 843 add x14,x14,x3 844 ldr x3,[sp,#0] 845 str x6,[sp,#24] 846 ror x16,x21,#14 847 add x24,x24,x28 // h+=K[i] 848 ror x5,x0,#1 849 and x17,x22,x21 850 ror x4,x13,#19 851 bic x28,x23,x21 852 ror x6,x25,#28 853 add x24,x24,x14 // h+=X[i] 854 eor x16,x16,x21,ror#18 855 eor x5,x5,x0,ror#8 856 orr x17,x17,x28 // Ch(e,f,g) 857 eor x28,x25,x26 // a^b, b^c in next round 858 eor x16,x16,x21,ror#41 // Sigma1(e) 859 eor x6,x6,x25,ror#34 860 add x24,x24,x17 // h+=Ch(e,f,g) 861 and x19,x19,x28 // (b^c)&=(a^b) 862 eor x4,x4,x13,ror#61 863 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 864 add x24,x24,x16 // h+=Sigma1(e) 865 eor x19,x19,x26 // Maj(a,b,c) 866 eor x17,x6,x25,ror#39 // Sigma0(a) 867 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 868 add x15,x15,x8 869 add x20,x20,x24 // d+=h 870 add x24,x24,x19 // h+=Maj(a,b,c) 871 ldr x19,[x30],#8 // *K++, x28 in next round 872 add x15,x15,x5 873 add x24,x24,x17 // h+=Sigma0(a) 874 add x15,x15,x4 875 ldr x4,[sp,#8] 876 str x7,[sp,#0] 877 ror x16,x20,#14 878 add x23,x23,x19 // h+=K[i] 879 ror x6,x1,#1 880 and x17,x21,x20 881 ror x5,x14,#19 882 bic x19,x22,x20 883 ror x7,x24,#28 884 add x23,x23,x15 // h+=X[i] 885 eor x16,x16,x20,ror#18 886 eor x6,x6,x1,ror#8 887 orr x17,x17,x19 // Ch(e,f,g) 888 eor x19,x24,x25 // a^b, b^c in next round 889 eor x16,x16,x20,ror#41 // Sigma1(e) 890 eor x7,x7,x24,ror#34 891 add x23,x23,x17 // h+=Ch(e,f,g) 892 and x28,x28,x19 // (b^c)&=(a^b) 893 eor x5,x5,x14,ror#61 894 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 895 add x23,x23,x16 // h+=Sigma1(e) 896 eor x28,x28,x25 // Maj(a,b,c) 897 eor x17,x7,x24,ror#39 // Sigma0(a) 898 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 899 add x0,x0,x9 900 add x27,x27,x23 // d+=h 901 add x23,x23,x28 // h+=Maj(a,b,c) 902 ldr x28,[x30],#8 // *K++, x19 in next round 903 add x0,x0,x6 904 add x23,x23,x17 // h+=Sigma0(a) 905 add x0,x0,x5 906 ldr x5,[sp,#16] 907 str x8,[sp,#8] 908 ror x16,x27,#14 909 add x22,x22,x28 // h+=K[i] 910 ror x7,x2,#1 911 and x17,x20,x27 912 ror x6,x15,#19 913 bic x28,x21,x27 914 ror x8,x23,#28 915 add x22,x22,x0 // h+=X[i] 916 eor x16,x16,x27,ror#18 917 eor x7,x7,x2,ror#8 918 orr x17,x17,x28 // Ch(e,f,g) 919 eor x28,x23,x24 // a^b, b^c in next round 920 eor x16,x16,x27,ror#41 // Sigma1(e) 921 eor x8,x8,x23,ror#34 922 add x22,x22,x17 // h+=Ch(e,f,g) 923 and x19,x19,x28 // (b^c)&=(a^b) 924 eor x6,x6,x15,ror#61 925 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 926 add x22,x22,x16 // h+=Sigma1(e) 927 eor x19,x19,x24 // Maj(a,b,c) 928 eor x17,x8,x23,ror#39 // Sigma0(a) 929 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 930 add x1,x1,x10 931 add x26,x26,x22 // d+=h 932 add x22,x22,x19 // h+=Maj(a,b,c) 933 ldr x19,[x30],#8 // *K++, x28 in next round 934 add x1,x1,x7 935 add x22,x22,x17 // h+=Sigma0(a) 936 add x1,x1,x6 937 ldr x6,[sp,#24] 938 str x9,[sp,#16] 939 ror x16,x26,#14 940 add x21,x21,x19 // h+=K[i] 941 ror x8,x3,#1 942 and x17,x27,x26 943 ror x7,x0,#19 944 bic x19,x20,x26 945 ror x9,x22,#28 946 add x21,x21,x1 // h+=X[i] 947 eor x16,x16,x26,ror#18 948 eor x8,x8,x3,ror#8 949 orr x17,x17,x19 // Ch(e,f,g) 950 eor x19,x22,x23 // a^b, b^c in next round 951 eor x16,x16,x26,ror#41 // Sigma1(e) 952 eor x9,x9,x22,ror#34 953 add x21,x21,x17 // h+=Ch(e,f,g) 954 and x28,x28,x19 // (b^c)&=(a^b) 955 eor x7,x7,x0,ror#61 956 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 957 add x21,x21,x16 // h+=Sigma1(e) 958 eor x28,x28,x23 // Maj(a,b,c) 959 eor x17,x9,x22,ror#39 // Sigma0(a) 960 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 961 add x2,x2,x11 962 add x25,x25,x21 // d+=h 963 add x21,x21,x28 // h+=Maj(a,b,c) 964 ldr x28,[x30],#8 // *K++, x19 in next round 965 add x2,x2,x8 966 add x21,x21,x17 // h+=Sigma0(a) 967 add x2,x2,x7 968 ldr x7,[sp,#0] 969 str x10,[sp,#24] 970 ror x16,x25,#14 971 add x20,x20,x28 // h+=K[i] 972 ror x9,x4,#1 973 and x17,x26,x25 974 ror x8,x1,#19 975 bic x28,x27,x25 976 ror x10,x21,#28 977 add x20,x20,x2 // h+=X[i] 978 eor x16,x16,x25,ror#18 979 eor x9,x9,x4,ror#8 980 orr x17,x17,x28 // Ch(e,f,g) 981 eor x28,x21,x22 // a^b, b^c in next round 982 eor x16,x16,x25,ror#41 // Sigma1(e) 983 eor x10,x10,x21,ror#34 984 add x20,x20,x17 // h+=Ch(e,f,g) 985 and x19,x19,x28 // (b^c)&=(a^b) 986 eor x8,x8,x1,ror#61 987 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 988 add x20,x20,x16 // h+=Sigma1(e) 989 eor x19,x19,x22 // Maj(a,b,c) 990 eor x17,x10,x21,ror#39 // Sigma0(a) 991 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 992 add x3,x3,x12 993 add x24,x24,x20 // d+=h 994 add x20,x20,x19 // h+=Maj(a,b,c) 995 ldr x19,[x30],#8 // *K++, x28 in next round 996 add x3,x3,x9 997 add x20,x20,x17 // h+=Sigma0(a) 998 add x3,x3,x8 999 cbnz x19,.Loop_16_xx 1000 1001 ldp x0,x2,[x29,#96] 1002 ldr x1,[x29,#112] 1003 sub x30,x30,#648 // rewind 1004 1005 ldp x3,x4,[x0] 1006 ldp x5,x6,[x0,#2*8] 1007 add x1,x1,#14*8 // advance input pointer 1008 ldp x7,x8,[x0,#4*8] 1009 add x20,x20,x3 1010 ldp x9,x10,[x0,#6*8] 1011 add x21,x21,x4 1012 add x22,x22,x5 1013 add x23,x23,x6 1014 stp x20,x21,[x0] 1015 add x24,x24,x7 1016 add x25,x25,x8 1017 stp x22,x23,[x0,#2*8] 1018 add x26,x26,x9 1019 add x27,x27,x10 1020 cmp x1,x2 1021 stp x24,x25,[x0,#4*8] 1022 stp x26,x27,[x0,#6*8] 1023 b.ne .Loop 1024 1025 ldp x19,x20,[x29,#16] 1026 add sp,sp,#4*8 1027 ldp x21,x22,[x29,#32] 1028 ldp x23,x24,[x29,#48] 1029 ldp x25,x26,[x29,#64] 1030 ldp x27,x28,[x29,#80] 1031 ldp x29,x30,[sp],#128 1032 AARCH64_VALIDATE_LINK_REGISTER 1033 ret 1034.size sha512_block_data_order,.-sha512_block_data_order 1035 1036.section .rodata 1037.align 6 1038.type .LK512,%object 1039.LK512: 1040.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1041.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1042.quad 0x3956c25bf348b538,0x59f111f1b605d019 1043.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1044.quad 0xd807aa98a3030242,0x12835b0145706fbe 1045.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1046.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1047.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1048.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1049.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1050.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1051.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1052.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1053.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1054.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1055.quad 0x06ca6351e003826f,0x142929670a0e6e70 1056.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1057.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1058.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1059.quad 0x81c2c92e47edaee6,0x92722c851482353b 1060.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1061.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1062.quad 0xd192e819d6ef5218,0xd69906245565a910 1063.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1064.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1065.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1066.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1067.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1068.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1069.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1070.quad 0x90befffa23631e28,0xa4506cebde82bde9 1071.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1072.quad 0xca273eceea26619c,0xd186b8c721c0c207 1073.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1074.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1075.quad 0x113f9804bef90dae,0x1b710b35131c471b 1076.quad 0x28db77f523047d84,0x32caab7b40c72493 1077.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1078.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1079.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1080.quad 0 // terminator 1081.size .LK512,.-.LK512 1082.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1083.align 2 1084.align 2 1085.text 1086#ifndef __KERNEL__ 1087.type sha512_block_armv8,%function 1088.align 6 1089sha512_block_armv8: 1090.Lv8_entry: 1091 stp x29,x30,[sp,#-16]! 1092 add x29,sp,#0 1093 1094 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1095 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1096 1097 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1098 adrp x3,.LK512 1099 add x3,x3,:lo12:.LK512 1100 1101 rev64 v16.16b,v16.16b 1102 rev64 v17.16b,v17.16b 1103 rev64 v18.16b,v18.16b 1104 rev64 v19.16b,v19.16b 1105 rev64 v20.16b,v20.16b 1106 rev64 v21.16b,v21.16b 1107 rev64 v22.16b,v22.16b 1108 rev64 v23.16b,v23.16b 1109 b .Loop_hw 1110 1111.align 4 1112.Loop_hw: 1113 ld1 {v24.2d},[x3],#16 1114 subs x2,x2,#1 1115 sub x4,x1,#128 1116 orr v26.16b,v0.16b,v0.16b // offload 1117 orr v27.16b,v1.16b,v1.16b 1118 orr v28.16b,v2.16b,v2.16b 1119 orr v29.16b,v3.16b,v3.16b 1120 csel x1,x1,x4,ne // conditional rewind 1121 add v24.2d,v24.2d,v16.2d 1122 ld1 {v25.2d},[x3],#16 1123 ext v24.16b,v24.16b,v24.16b,#8 1124 ext v5.16b,v2.16b,v3.16b,#8 1125 ext v6.16b,v1.16b,v2.16b,#8 1126 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1127.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1128 ext v7.16b,v20.16b,v21.16b,#8 1129.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1130.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1131 add v4.2d,v1.2d,v3.2d // "D + T1" 1132.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1133 add v25.2d,v25.2d,v17.2d 1134 ld1 {v24.2d},[x3],#16 1135 ext v25.16b,v25.16b,v25.16b,#8 1136 ext v5.16b,v4.16b,v2.16b,#8 1137 ext v6.16b,v0.16b,v4.16b,#8 1138 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1139.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1140 ext v7.16b,v21.16b,v22.16b,#8 1141.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1142.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1143 add v1.2d,v0.2d,v2.2d // "D + T1" 1144.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1145 add v24.2d,v24.2d,v18.2d 1146 ld1 {v25.2d},[x3],#16 1147 ext v24.16b,v24.16b,v24.16b,#8 1148 ext v5.16b,v1.16b,v4.16b,#8 1149 ext v6.16b,v3.16b,v1.16b,#8 1150 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1151.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1152 ext v7.16b,v22.16b,v23.16b,#8 1153.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1154.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1155 add v0.2d,v3.2d,v4.2d // "D + T1" 1156.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1157 add v25.2d,v25.2d,v19.2d 1158 ld1 {v24.2d},[x3],#16 1159 ext v25.16b,v25.16b,v25.16b,#8 1160 ext v5.16b,v0.16b,v1.16b,#8 1161 ext v6.16b,v2.16b,v0.16b,#8 1162 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1163.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1164 ext v7.16b,v23.16b,v16.16b,#8 1165.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1166.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1167 add v3.2d,v2.2d,v1.2d // "D + T1" 1168.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1169 add v24.2d,v24.2d,v20.2d 1170 ld1 {v25.2d},[x3],#16 1171 ext v24.16b,v24.16b,v24.16b,#8 1172 ext v5.16b,v3.16b,v0.16b,#8 1173 ext v6.16b,v4.16b,v3.16b,#8 1174 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1175.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1176 ext v7.16b,v16.16b,v17.16b,#8 1177.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1178.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1179 add v2.2d,v4.2d,v0.2d // "D + T1" 1180.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1181 add v25.2d,v25.2d,v21.2d 1182 ld1 {v24.2d},[x3],#16 1183 ext v25.16b,v25.16b,v25.16b,#8 1184 ext v5.16b,v2.16b,v3.16b,#8 1185 ext v6.16b,v1.16b,v2.16b,#8 1186 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1187.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1188 ext v7.16b,v17.16b,v18.16b,#8 1189.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1190.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1191 add v4.2d,v1.2d,v3.2d // "D + T1" 1192.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1193 add v24.2d,v24.2d,v22.2d 1194 ld1 {v25.2d},[x3],#16 1195 ext v24.16b,v24.16b,v24.16b,#8 1196 ext v5.16b,v4.16b,v2.16b,#8 1197 ext v6.16b,v0.16b,v4.16b,#8 1198 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1199.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1200 ext v7.16b,v18.16b,v19.16b,#8 1201.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1202.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1203 add v1.2d,v0.2d,v2.2d // "D + T1" 1204.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1205 add v25.2d,v25.2d,v23.2d 1206 ld1 {v24.2d},[x3],#16 1207 ext v25.16b,v25.16b,v25.16b,#8 1208 ext v5.16b,v1.16b,v4.16b,#8 1209 ext v6.16b,v3.16b,v1.16b,#8 1210 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1211.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1212 ext v7.16b,v19.16b,v20.16b,#8 1213.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1214.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1215 add v0.2d,v3.2d,v4.2d // "D + T1" 1216.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1217 add v24.2d,v24.2d,v16.2d 1218 ld1 {v25.2d},[x3],#16 1219 ext v24.16b,v24.16b,v24.16b,#8 1220 ext v5.16b,v0.16b,v1.16b,#8 1221 ext v6.16b,v2.16b,v0.16b,#8 1222 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1223.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1224 ext v7.16b,v20.16b,v21.16b,#8 1225.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1226.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1227 add v3.2d,v2.2d,v1.2d // "D + T1" 1228.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1229 add v25.2d,v25.2d,v17.2d 1230 ld1 {v24.2d},[x3],#16 1231 ext v25.16b,v25.16b,v25.16b,#8 1232 ext v5.16b,v3.16b,v0.16b,#8 1233 ext v6.16b,v4.16b,v3.16b,#8 1234 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1235.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1236 ext v7.16b,v21.16b,v22.16b,#8 1237.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1238.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1239 add v2.2d,v4.2d,v0.2d // "D + T1" 1240.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1241 add v24.2d,v24.2d,v18.2d 1242 ld1 {v25.2d},[x3],#16 1243 ext v24.16b,v24.16b,v24.16b,#8 1244 ext v5.16b,v2.16b,v3.16b,#8 1245 ext v6.16b,v1.16b,v2.16b,#8 1246 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1247.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1248 ext v7.16b,v22.16b,v23.16b,#8 1249.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1250.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1251 add v4.2d,v1.2d,v3.2d // "D + T1" 1252.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1253 add v25.2d,v25.2d,v19.2d 1254 ld1 {v24.2d},[x3],#16 1255 ext v25.16b,v25.16b,v25.16b,#8 1256 ext v5.16b,v4.16b,v2.16b,#8 1257 ext v6.16b,v0.16b,v4.16b,#8 1258 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1259.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1260 ext v7.16b,v23.16b,v16.16b,#8 1261.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1262.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1263 add v1.2d,v0.2d,v2.2d // "D + T1" 1264.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1265 add v24.2d,v24.2d,v20.2d 1266 ld1 {v25.2d},[x3],#16 1267 ext v24.16b,v24.16b,v24.16b,#8 1268 ext v5.16b,v1.16b,v4.16b,#8 1269 ext v6.16b,v3.16b,v1.16b,#8 1270 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1271.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1272 ext v7.16b,v16.16b,v17.16b,#8 1273.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1274.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1275 add v0.2d,v3.2d,v4.2d // "D + T1" 1276.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1277 add v25.2d,v25.2d,v21.2d 1278 ld1 {v24.2d},[x3],#16 1279 ext v25.16b,v25.16b,v25.16b,#8 1280 ext v5.16b,v0.16b,v1.16b,#8 1281 ext v6.16b,v2.16b,v0.16b,#8 1282 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1283.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1284 ext v7.16b,v17.16b,v18.16b,#8 1285.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1286.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1287 add v3.2d,v2.2d,v1.2d // "D + T1" 1288.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1289 add v24.2d,v24.2d,v22.2d 1290 ld1 {v25.2d},[x3],#16 1291 ext v24.16b,v24.16b,v24.16b,#8 1292 ext v5.16b,v3.16b,v0.16b,#8 1293 ext v6.16b,v4.16b,v3.16b,#8 1294 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1295.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1296 ext v7.16b,v18.16b,v19.16b,#8 1297.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1298.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1299 add v2.2d,v4.2d,v0.2d // "D + T1" 1300.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1301 add v25.2d,v25.2d,v23.2d 1302 ld1 {v24.2d},[x3],#16 1303 ext v25.16b,v25.16b,v25.16b,#8 1304 ext v5.16b,v2.16b,v3.16b,#8 1305 ext v6.16b,v1.16b,v2.16b,#8 1306 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1307.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1308 ext v7.16b,v19.16b,v20.16b,#8 1309.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1310.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1311 add v4.2d,v1.2d,v3.2d // "D + T1" 1312.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1313 add v24.2d,v24.2d,v16.2d 1314 ld1 {v25.2d},[x3],#16 1315 ext v24.16b,v24.16b,v24.16b,#8 1316 ext v5.16b,v4.16b,v2.16b,#8 1317 ext v6.16b,v0.16b,v4.16b,#8 1318 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1319.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1320 ext v7.16b,v20.16b,v21.16b,#8 1321.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1322.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1323 add v1.2d,v0.2d,v2.2d // "D + T1" 1324.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1325 add v25.2d,v25.2d,v17.2d 1326 ld1 {v24.2d},[x3],#16 1327 ext v25.16b,v25.16b,v25.16b,#8 1328 ext v5.16b,v1.16b,v4.16b,#8 1329 ext v6.16b,v3.16b,v1.16b,#8 1330 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1331.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1332 ext v7.16b,v21.16b,v22.16b,#8 1333.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1334.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1335 add v0.2d,v3.2d,v4.2d // "D + T1" 1336.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1337 add v24.2d,v24.2d,v18.2d 1338 ld1 {v25.2d},[x3],#16 1339 ext v24.16b,v24.16b,v24.16b,#8 1340 ext v5.16b,v0.16b,v1.16b,#8 1341 ext v6.16b,v2.16b,v0.16b,#8 1342 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1343.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1344 ext v7.16b,v22.16b,v23.16b,#8 1345.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1346.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1347 add v3.2d,v2.2d,v1.2d // "D + T1" 1348.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1349 add v25.2d,v25.2d,v19.2d 1350 ld1 {v24.2d},[x3],#16 1351 ext v25.16b,v25.16b,v25.16b,#8 1352 ext v5.16b,v3.16b,v0.16b,#8 1353 ext v6.16b,v4.16b,v3.16b,#8 1354 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1355.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1356 ext v7.16b,v23.16b,v16.16b,#8 1357.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1358.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1359 add v2.2d,v4.2d,v0.2d // "D + T1" 1360.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1361 add v24.2d,v24.2d,v20.2d 1362 ld1 {v25.2d},[x3],#16 1363 ext v24.16b,v24.16b,v24.16b,#8 1364 ext v5.16b,v2.16b,v3.16b,#8 1365 ext v6.16b,v1.16b,v2.16b,#8 1366 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1367.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1368 ext v7.16b,v16.16b,v17.16b,#8 1369.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1370.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1371 add v4.2d,v1.2d,v3.2d // "D + T1" 1372.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1373 add v25.2d,v25.2d,v21.2d 1374 ld1 {v24.2d},[x3],#16 1375 ext v25.16b,v25.16b,v25.16b,#8 1376 ext v5.16b,v4.16b,v2.16b,#8 1377 ext v6.16b,v0.16b,v4.16b,#8 1378 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1379.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1380 ext v7.16b,v17.16b,v18.16b,#8 1381.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1382.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1383 add v1.2d,v0.2d,v2.2d // "D + T1" 1384.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1385 add v24.2d,v24.2d,v22.2d 1386 ld1 {v25.2d},[x3],#16 1387 ext v24.16b,v24.16b,v24.16b,#8 1388 ext v5.16b,v1.16b,v4.16b,#8 1389 ext v6.16b,v3.16b,v1.16b,#8 1390 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1391.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1392 ext v7.16b,v18.16b,v19.16b,#8 1393.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1394.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1395 add v0.2d,v3.2d,v4.2d // "D + T1" 1396.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1397 add v25.2d,v25.2d,v23.2d 1398 ld1 {v24.2d},[x3],#16 1399 ext v25.16b,v25.16b,v25.16b,#8 1400 ext v5.16b,v0.16b,v1.16b,#8 1401 ext v6.16b,v2.16b,v0.16b,#8 1402 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1403.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1404 ext v7.16b,v19.16b,v20.16b,#8 1405.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1406.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1407 add v3.2d,v2.2d,v1.2d // "D + T1" 1408.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1409 add v24.2d,v24.2d,v16.2d 1410 ld1 {v25.2d},[x3],#16 1411 ext v24.16b,v24.16b,v24.16b,#8 1412 ext v5.16b,v3.16b,v0.16b,#8 1413 ext v6.16b,v4.16b,v3.16b,#8 1414 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1415.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1416 ext v7.16b,v20.16b,v21.16b,#8 1417.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1418.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1419 add v2.2d,v4.2d,v0.2d // "D + T1" 1420.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1421 add v25.2d,v25.2d,v17.2d 1422 ld1 {v24.2d},[x3],#16 1423 ext v25.16b,v25.16b,v25.16b,#8 1424 ext v5.16b,v2.16b,v3.16b,#8 1425 ext v6.16b,v1.16b,v2.16b,#8 1426 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1427.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1428 ext v7.16b,v21.16b,v22.16b,#8 1429.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1430.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1431 add v4.2d,v1.2d,v3.2d // "D + T1" 1432.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1433 add v24.2d,v24.2d,v18.2d 1434 ld1 {v25.2d},[x3],#16 1435 ext v24.16b,v24.16b,v24.16b,#8 1436 ext v5.16b,v4.16b,v2.16b,#8 1437 ext v6.16b,v0.16b,v4.16b,#8 1438 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1439.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1440 ext v7.16b,v22.16b,v23.16b,#8 1441.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1442.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1443 add v1.2d,v0.2d,v2.2d // "D + T1" 1444.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1445 add v25.2d,v25.2d,v19.2d 1446 ld1 {v24.2d},[x3],#16 1447 ext v25.16b,v25.16b,v25.16b,#8 1448 ext v5.16b,v1.16b,v4.16b,#8 1449 ext v6.16b,v3.16b,v1.16b,#8 1450 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1451.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1452 ext v7.16b,v23.16b,v16.16b,#8 1453.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1454.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1455 add v0.2d,v3.2d,v4.2d // "D + T1" 1456.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1457 add v24.2d,v24.2d,v20.2d 1458 ld1 {v25.2d},[x3],#16 1459 ext v24.16b,v24.16b,v24.16b,#8 1460 ext v5.16b,v0.16b,v1.16b,#8 1461 ext v6.16b,v2.16b,v0.16b,#8 1462 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1463.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1464 ext v7.16b,v16.16b,v17.16b,#8 1465.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1466.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1467 add v3.2d,v2.2d,v1.2d // "D + T1" 1468.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1469 add v25.2d,v25.2d,v21.2d 1470 ld1 {v24.2d},[x3],#16 1471 ext v25.16b,v25.16b,v25.16b,#8 1472 ext v5.16b,v3.16b,v0.16b,#8 1473 ext v6.16b,v4.16b,v3.16b,#8 1474 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1475.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1476 ext v7.16b,v17.16b,v18.16b,#8 1477.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1478.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1479 add v2.2d,v4.2d,v0.2d // "D + T1" 1480.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1481 add v24.2d,v24.2d,v22.2d 1482 ld1 {v25.2d},[x3],#16 1483 ext v24.16b,v24.16b,v24.16b,#8 1484 ext v5.16b,v2.16b,v3.16b,#8 1485 ext v6.16b,v1.16b,v2.16b,#8 1486 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1487.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1488 ext v7.16b,v18.16b,v19.16b,#8 1489.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1490.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1491 add v4.2d,v1.2d,v3.2d // "D + T1" 1492.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1493 add v25.2d,v25.2d,v23.2d 1494 ld1 {v24.2d},[x3],#16 1495 ext v25.16b,v25.16b,v25.16b,#8 1496 ext v5.16b,v4.16b,v2.16b,#8 1497 ext v6.16b,v0.16b,v4.16b,#8 1498 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1499.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1500 ext v7.16b,v19.16b,v20.16b,#8 1501.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1502.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1503 add v1.2d,v0.2d,v2.2d // "D + T1" 1504.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1505 ld1 {v25.2d},[x3],#16 1506 add v24.2d,v24.2d,v16.2d 1507 ld1 {v16.16b},[x1],#16 // load next input 1508 ext v24.16b,v24.16b,v24.16b,#8 1509 ext v5.16b,v1.16b,v4.16b,#8 1510 ext v6.16b,v3.16b,v1.16b,#8 1511 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1512.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1513 rev64 v16.16b,v16.16b 1514 add v0.2d,v3.2d,v4.2d // "D + T1" 1515.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1516 ld1 {v24.2d},[x3],#16 1517 add v25.2d,v25.2d,v17.2d 1518 ld1 {v17.16b},[x1],#16 // load next input 1519 ext v25.16b,v25.16b,v25.16b,#8 1520 ext v5.16b,v0.16b,v1.16b,#8 1521 ext v6.16b,v2.16b,v0.16b,#8 1522 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1523.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1524 rev64 v17.16b,v17.16b 1525 add v3.2d,v2.2d,v1.2d // "D + T1" 1526.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1527 ld1 {v25.2d},[x3],#16 1528 add v24.2d,v24.2d,v18.2d 1529 ld1 {v18.16b},[x1],#16 // load next input 1530 ext v24.16b,v24.16b,v24.16b,#8 1531 ext v5.16b,v3.16b,v0.16b,#8 1532 ext v6.16b,v4.16b,v3.16b,#8 1533 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1534.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1535 rev64 v18.16b,v18.16b 1536 add v2.2d,v4.2d,v0.2d // "D + T1" 1537.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1538 ld1 {v24.2d},[x3],#16 1539 add v25.2d,v25.2d,v19.2d 1540 ld1 {v19.16b},[x1],#16 // load next input 1541 ext v25.16b,v25.16b,v25.16b,#8 1542 ext v5.16b,v2.16b,v3.16b,#8 1543 ext v6.16b,v1.16b,v2.16b,#8 1544 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1545.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1546 rev64 v19.16b,v19.16b 1547 add v4.2d,v1.2d,v3.2d // "D + T1" 1548.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1549 ld1 {v25.2d},[x3],#16 1550 add v24.2d,v24.2d,v20.2d 1551 ld1 {v20.16b},[x1],#16 // load next input 1552 ext v24.16b,v24.16b,v24.16b,#8 1553 ext v5.16b,v4.16b,v2.16b,#8 1554 ext v6.16b,v0.16b,v4.16b,#8 1555 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1556.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1557 rev64 v20.16b,v20.16b 1558 add v1.2d,v0.2d,v2.2d // "D + T1" 1559.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1560 ld1 {v24.2d},[x3],#16 1561 add v25.2d,v25.2d,v21.2d 1562 ld1 {v21.16b},[x1],#16 // load next input 1563 ext v25.16b,v25.16b,v25.16b,#8 1564 ext v5.16b,v1.16b,v4.16b,#8 1565 ext v6.16b,v3.16b,v1.16b,#8 1566 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1567.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1568 rev64 v21.16b,v21.16b 1569 add v0.2d,v3.2d,v4.2d // "D + T1" 1570.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1571 ld1 {v25.2d},[x3],#16 1572 add v24.2d,v24.2d,v22.2d 1573 ld1 {v22.16b},[x1],#16 // load next input 1574 ext v24.16b,v24.16b,v24.16b,#8 1575 ext v5.16b,v0.16b,v1.16b,#8 1576 ext v6.16b,v2.16b,v0.16b,#8 1577 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1578.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1579 rev64 v22.16b,v22.16b 1580 add v3.2d,v2.2d,v1.2d // "D + T1" 1581.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1582 sub x3,x3,#80*8 // rewind 1583 add v25.2d,v25.2d,v23.2d 1584 ld1 {v23.16b},[x1],#16 // load next input 1585 ext v25.16b,v25.16b,v25.16b,#8 1586 ext v5.16b,v3.16b,v0.16b,#8 1587 ext v6.16b,v4.16b,v3.16b,#8 1588 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1589.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1590 rev64 v23.16b,v23.16b 1591 add v2.2d,v4.2d,v0.2d // "D + T1" 1592.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1593 add v0.2d,v0.2d,v26.2d // accumulate 1594 add v1.2d,v1.2d,v27.2d 1595 add v2.2d,v2.2d,v28.2d 1596 add v3.2d,v3.2d,v29.2d 1597 1598 cbnz x2,.Loop_hw 1599 1600 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1601 1602 ldr x29,[sp],#16 1603 ret 1604.size sha512_block_armv8,.-sha512_block_armv8 1605#endif 1606#endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) 1607