1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Based on CRYPTOGAMS code with the following comment: 6// # ==================================================================== 7// # Written by Andy Polyakov <[email protected]> for the OpenSSL 8// # project. The module is, however, dual licensed under OpenSSL and 9// # CRYPTOGAMS licenses depending on where you obtain it. For further 10// # details see http://www.openssl.org/~appro/cryptogams/. 11// # ==================================================================== 12 13//go:build (ppc64 || ppc64le) && !purego 14 15#include "textflag.h" 16 17// SHA512 block routine. See sha512block.go for Go equivalent. 18// 19// The algorithm is detailed in FIPS 180-4: 20// 21// https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 22// 23// Wt = Mt; for 0 <= t <= 15 24// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79 25// 26// a = H0 27// b = H1 28// c = H2 29// d = H3 30// e = H4 31// f = H5 32// g = H6 33// h = H7 34// 35// for t = 0 to 79 { 36// T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 37// T2 = BIGSIGMA0(a) + Maj(a,b,c) 38// h = g 39// g = f 40// f = e 41// e = d + T1 42// d = c 43// c = b 44// b = a 45// a = T1 + T2 46// } 47// 48// H0 = a + H0 49// H1 = b + H1 50// H2 = c + H2 51// H3 = d + H3 52// H4 = e + H4 53// H5 = f + H5 54// H6 = g + H6 55// H7 = h + H7 56 57#define CTX R3 58#define INP R4 59#define END R5 60#define TBL R6 61#define CNT R8 62#define LEN R9 63#define TEMP R12 64 65#define TBL_STRT R7 // Pointer to start of kcon table. 66 67#define R_x000 R0 68#define R_x010 R10 69#define R_x020 R25 70#define R_x030 R26 71#define R_x040 R14 72#define R_x050 R15 73#define R_x060 R16 74#define R_x070 R17 75#define R_x080 R18 76#define R_x090 R19 77#define R_x0a0 R20 78#define R_x0b0 R21 79#define R_x0c0 R22 80#define R_x0d0 R23 81#define R_x0e0 R24 82#define R_x0f0 R28 83#define R_x100 R29 84#define R_x110 R27 85 86 87// V0-V7 are A-H 88// V8-V23 are used for the message schedule 89#define KI V24 90#define FUNC V25 91#define S0 V26 92#define S1 V27 93#define s0 V28 94#define s1 V29 95#define LEMASK V31 // Permutation control register for little endian 96 97// VPERM is needed on LE to switch the bytes 98 99#ifdef GOARCH_ppc64le 100#define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 101#else 102#define VPERMLE(va,vb,vc,vt) 103#endif 104 105// 2 copies of each Kt, to fill both doublewords of a vector register 106DATA ·kcon+0x000(SB)/8, $0x428a2f98d728ae22 107DATA ·kcon+0x008(SB)/8, $0x428a2f98d728ae22 108DATA ·kcon+0x010(SB)/8, $0x7137449123ef65cd 109DATA ·kcon+0x018(SB)/8, $0x7137449123ef65cd 110DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f 111DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f 112DATA ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc 113DATA ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc 114DATA ·kcon+0x040(SB)/8, $0x3956c25bf348b538 115DATA ·kcon+0x048(SB)/8, $0x3956c25bf348b538 116DATA ·kcon+0x050(SB)/8, $0x59f111f1b605d019 117DATA ·kcon+0x058(SB)/8, $0x59f111f1b605d019 118DATA ·kcon+0x060(SB)/8, $0x923f82a4af194f9b 119DATA ·kcon+0x068(SB)/8, $0x923f82a4af194f9b 120DATA ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118 121DATA ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118 122DATA ·kcon+0x080(SB)/8, $0xd807aa98a3030242 123DATA ·kcon+0x088(SB)/8, $0xd807aa98a3030242 124DATA ·kcon+0x090(SB)/8, $0x12835b0145706fbe 125DATA ·kcon+0x098(SB)/8, $0x12835b0145706fbe 126DATA ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c 127DATA ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c 128DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2 129DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2 130DATA ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f 131DATA ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f 132DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1 133DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1 134DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235 135DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235 136DATA ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694 137DATA ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694 138DATA ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2 139DATA ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2 140DATA ·kcon+0x110(SB)/8, $0xefbe4786384f25e3 141DATA ·kcon+0x118(SB)/8, $0xefbe4786384f25e3 142DATA ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5 143DATA ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5 144DATA ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65 145DATA ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65 146DATA ·kcon+0x140(SB)/8, $0x2de92c6f592b0275 147DATA ·kcon+0x148(SB)/8, $0x2de92c6f592b0275 148DATA ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483 149DATA ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483 150DATA ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4 151DATA ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4 152DATA ·kcon+0x170(SB)/8, $0x76f988da831153b5 153DATA ·kcon+0x178(SB)/8, $0x76f988da831153b5 154DATA ·kcon+0x180(SB)/8, $0x983e5152ee66dfab 155DATA ·kcon+0x188(SB)/8, $0x983e5152ee66dfab 156DATA ·kcon+0x190(SB)/8, $0xa831c66d2db43210 157DATA ·kcon+0x198(SB)/8, $0xa831c66d2db43210 158DATA ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f 159DATA ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f 160DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4 161DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4 162DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2 163DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2 164DATA ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725 165DATA ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725 166DATA ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f 167DATA ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f 168DATA ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70 169DATA ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70 170DATA ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc 171DATA ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc 172DATA ·kcon+0x210(SB)/8, $0x2e1b21385c26c926 173DATA ·kcon+0x218(SB)/8, $0x2e1b21385c26c926 174DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed 175DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed 176DATA ·kcon+0x230(SB)/8, $0x53380d139d95b3df 177DATA ·kcon+0x238(SB)/8, $0x53380d139d95b3df 178DATA ·kcon+0x240(SB)/8, $0x650a73548baf63de 179DATA ·kcon+0x248(SB)/8, $0x650a73548baf63de 180DATA ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8 181DATA ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8 182DATA ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6 183DATA ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6 184DATA ·kcon+0x270(SB)/8, $0x92722c851482353b 185DATA ·kcon+0x278(SB)/8, $0x92722c851482353b 186DATA ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364 187DATA ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364 188DATA ·kcon+0x290(SB)/8, $0xa81a664bbc423001 189DATA ·kcon+0x298(SB)/8, $0xa81a664bbc423001 190DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791 191DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791 192DATA ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30 193DATA ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30 194DATA ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218 195DATA ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218 196DATA ·kcon+0x2D0(SB)/8, $0xd69906245565a910 197DATA ·kcon+0x2D8(SB)/8, $0xd69906245565a910 198DATA ·kcon+0x2E0(SB)/8, $0xf40e35855771202a 199DATA ·kcon+0x2E8(SB)/8, $0xf40e35855771202a 200DATA ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8 201DATA ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8 202DATA ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8 203DATA ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8 204DATA ·kcon+0x310(SB)/8, $0x1e376c085141ab53 205DATA ·kcon+0x318(SB)/8, $0x1e376c085141ab53 206DATA ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99 207DATA ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99 208DATA ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8 209DATA ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8 210DATA ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63 211DATA ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63 212DATA ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb 213DATA ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb 214DATA ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373 215DATA ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373 216DATA ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3 217DATA ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3 218DATA ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc 219DATA ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc 220DATA ·kcon+0x390(SB)/8, $0x78a5636f43172f60 221DATA ·kcon+0x398(SB)/8, $0x78a5636f43172f60 222DATA ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72 223DATA ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72 224DATA ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec 225DATA ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec 226DATA ·kcon+0x3C0(SB)/8, $0x90befffa23631e28 227DATA ·kcon+0x3C8(SB)/8, $0x90befffa23631e28 228DATA ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9 229DATA ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9 230DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915 231DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915 232DATA ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b 233DATA ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b 234DATA ·kcon+0x400(SB)/8, $0xca273eceea26619c 235DATA ·kcon+0x408(SB)/8, $0xca273eceea26619c 236DATA ·kcon+0x410(SB)/8, $0xd186b8c721c0c207 237DATA ·kcon+0x418(SB)/8, $0xd186b8c721c0c207 238DATA ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e 239DATA ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e 240DATA ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178 241DATA ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178 242DATA ·kcon+0x440(SB)/8, $0x06f067aa72176fba 243DATA ·kcon+0x448(SB)/8, $0x06f067aa72176fba 244DATA ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6 245DATA ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6 246DATA ·kcon+0x460(SB)/8, $0x113f9804bef90dae 247DATA ·kcon+0x468(SB)/8, $0x113f9804bef90dae 248DATA ·kcon+0x470(SB)/8, $0x1b710b35131c471b 249DATA ·kcon+0x478(SB)/8, $0x1b710b35131c471b 250DATA ·kcon+0x480(SB)/8, $0x28db77f523047d84 251DATA ·kcon+0x488(SB)/8, $0x28db77f523047d84 252DATA ·kcon+0x490(SB)/8, $0x32caab7b40c72493 253DATA ·kcon+0x498(SB)/8, $0x32caab7b40c72493 254DATA ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc 255DATA ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc 256DATA ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c 257DATA ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c 258DATA ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6 259DATA ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6 260DATA ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a 261DATA ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a 262DATA ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec 263DATA ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec 264DATA ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817 265DATA ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817 266DATA ·kcon+0x500(SB)/8, $0x0000000000000000 267DATA ·kcon+0x508(SB)/8, $0x0000000000000000 268DATA ·kcon+0x510(SB)/8, $0x1011121314151617 269DATA ·kcon+0x518(SB)/8, $0x0001020304050607 270GLOBL ·kcon(SB), RODATA, $1312 271 272#define SHA512ROUND0(a, b, c, d, e, f, g, h, xi, idx) \ 273 VSEL g, f, e, FUNC; \ 274 VSHASIGMAD $15, e, $1, S1; \ 275 VADDUDM xi, h, h; \ 276 VSHASIGMAD $0, a, $1, S0; \ 277 VADDUDM FUNC, h, h; \ 278 VXOR b, a, FUNC; \ 279 VADDUDM S1, h, h; \ 280 VSEL b, c, FUNC, FUNC; \ 281 VADDUDM KI, g, g; \ 282 VADDUDM h, d, d; \ 283 VADDUDM FUNC, S0, S0; \ 284 LVX (TBL)(idx), KI; \ 285 VADDUDM S0, h, h 286 287#define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \ 288 VSHASIGMAD $0, xj_1, $0, s0; \ 289 VSEL g, f, e, FUNC; \ 290 VSHASIGMAD $15, e, $1, S1; \ 291 VADDUDM xi, h, h; \ 292 VSHASIGMAD $0, a, $1, S0; \ 293 VSHASIGMAD $15, xj_14, $0, s1; \ 294 VADDUDM FUNC, h, h; \ 295 VXOR b, a, FUNC; \ 296 VADDUDM xj_9, xj, xj; \ 297 VADDUDM S1, h, h; \ 298 VSEL b, c, FUNC, FUNC; \ 299 VADDUDM KI, g, g; \ 300 VADDUDM h, d, d; \ 301 VADDUDM FUNC, S0, S0; \ 302 VADDUDM s0, xj, xj; \ 303 LVX (TBL)(idx), KI; \ 304 VADDUDM S0, h, h; \ 305 VADDUDM s1, xj, xj 306 307// func block(dig *digest, p []byte) 308TEXT ·block(SB),0,$0-32 309 MOVD dig+0(FP), CTX 310 MOVD p_base+8(FP), INP 311 MOVD p_len+16(FP), LEN 312 313 SRD $6, LEN 314 SLD $6, LEN 315 316 ADD INP, LEN, END 317 318 CMP INP, END 319 BEQ end 320 321 MOVD $·kcon(SB), TBL_STRT 322 323 MOVD R0, CNT 324 MOVWZ $0x010, R_x010 325 MOVWZ $0x020, R_x020 326 MOVWZ $0x030, R_x030 327 MOVD $0x040, R_x040 328 MOVD $0x050, R_x050 329 MOVD $0x060, R_x060 330 MOVD $0x070, R_x070 331 MOVD $0x080, R_x080 332 MOVD $0x090, R_x090 333 MOVD $0x0a0, R_x0a0 334 MOVD $0x0b0, R_x0b0 335 MOVD $0x0c0, R_x0c0 336 MOVD $0x0d0, R_x0d0 337 MOVD $0x0e0, R_x0e0 338 MOVD $0x0f0, R_x0f0 339 MOVD $0x100, R_x100 340 MOVD $0x110, R_x110 341 342 343#ifdef GOARCH_ppc64le 344 // Generate the mask used with VPERM for LE 345 MOVWZ $8, TEMP 346 LVSL (TEMP)(R0), LEMASK 347 VSPLTISB $0x0F, KI 348 VXOR KI, LEMASK, LEMASK 349#endif 350 351 LXVD2X (CTX)(R_x000), VS32 // v0 = vs32 352 LXVD2X (CTX)(R_x010), VS34 // v2 = vs34 353 LXVD2X (CTX)(R_x020), VS36 // v4 = vs36 354 355 // unpack the input values into vector registers 356 VSLDOI $8, V0, V0, V1 357 LXVD2X (CTX)(R_x030), VS38 // v6 = vs38 358 VSLDOI $8, V2, V2, V3 359 VSLDOI $8, V4, V4, V5 360 VSLDOI $8, V6, V6, V7 361 362loop: 363 MOVD TBL_STRT, TBL 364 LVX (TBL)(R_x000), KI 365 366 LXVD2X (INP)(R0), VS40 // load v8 (=vs40) in advance 367 ADD $16, INP 368 369 // Copy V0-V7 to VS24-VS31 370 371 XXLOR V0, V0, VS24 372 XXLOR V1, V1, VS25 373 XXLOR V2, V2, VS26 374 XXLOR V3, V3, VS27 375 XXLOR V4, V4, VS28 376 XXLOR V5, V5, VS29 377 XXLOR V6, V6, VS30 378 XXLOR V7, V7, VS31 379 380 VADDUDM KI, V7, V7 // h+K[i] 381 LVX (TBL)(R_x010), KI 382 383 VPERMLE(V8,V8,LEMASK,V8) 384 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020) 385 LXVD2X (INP)(R_x000), VS42 // load v10 (=vs42) in advance 386 VSLDOI $8, V8, V8, V9 387 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030) 388 VPERMLE(V10,V10,LEMASK,V10) 389 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040) 390 LXVD2X (INP)(R_x010), VS44 // load v12 (=vs44) in advance 391 VSLDOI $8, V10, V10, V11 392 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050) 393 VPERMLE(V12,V12,LEMASK,V12) 394 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060) 395 LXVD2X (INP)(R_x020), VS46 // load v14 (=vs46) in advance 396 VSLDOI $8, V12, V12, V13 397 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070) 398 VPERMLE(V14,V14,LEMASK,V14) 399 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080) 400 LXVD2X (INP)(R_x030), VS48 // load v16 (=vs48) in advance 401 VSLDOI $8, V14, V14, V15 402 SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090) 403 VPERMLE(V16,V16,LEMASK,V16) 404 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0) 405 LXVD2X (INP)(R_x040), VS50 // load v18 (=vs50) in advance 406 VSLDOI $8, V16, V16, V17 407 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0) 408 VPERMLE(V18,V18,LEMASK,V18) 409 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0) 410 LXVD2X (INP)(R_x050), VS52 // load v20 (=vs52) in advance 411 VSLDOI $8, V18, V18, V19 412 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0) 413 VPERMLE(V20,V20,LEMASK,V20) 414 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0) 415 LXVD2X (INP)(R_x060), VS54 // load v22 (=vs54) in advance 416 VSLDOI $8, V20, V20, V21 417 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0) 418 VPERMLE(V22,V22,LEMASK,V22) 419 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100) 420 VSLDOI $8, V22, V22, V23 421 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110) 422 423 MOVWZ $4, TEMP 424 MOVWZ TEMP, CTR 425 ADD $0x120, TBL 426 ADD $0x70, INP 427 428L16_xx: 429 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000) 430 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010) 431 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020) 432 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030) 433 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040) 434 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050) 435 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060) 436 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070) 437 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080) 438 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090) 439 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0) 440 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0) 441 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0) 442 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0) 443 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0) 444 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0) 445 ADD $0x100, TBL 446 447 BDNZ L16_xx 448 449 XXLOR VS24, VS24, V10 450 XXLOR VS25, VS25, V11 451 XXLOR VS26, VS26, V12 452 XXLOR VS27, VS27, V13 453 XXLOR VS28, VS28, V14 454 XXLOR VS29, VS29, V15 455 XXLOR VS30, VS30, V16 456 XXLOR VS31, VS31, V17 457 VADDUDM V10, V0, V0 458 VADDUDM V11, V1, V1 459 VADDUDM V12, V2, V2 460 VADDUDM V13, V3, V3 461 VADDUDM V14, V4, V4 462 VADDUDM V15, V5, V5 463 VADDUDM V16, V6, V6 464 VADDUDM V17, V7, V7 465 466 CMPU INP, END 467 BLT loop 468 469#ifdef GOARCH_ppc64le 470 VPERM V0, V1, KI, V0 471 VPERM V2, V3, KI, V2 472 VPERM V4, V5, KI, V4 473 VPERM V6, V7, KI, V6 474#else 475 VPERM V1, V0, KI, V0 476 VPERM V3, V2, KI, V2 477 VPERM V5, V4, KI, V4 478 VPERM V7, V6, KI, V6 479#endif 480 STXVD2X VS32, (CTX+R_x000) // v0 = vs32 481 STXVD2X VS34, (CTX+R_x010) // v2 = vs34 482 STXVD2X VS36, (CTX+R_x020) // v4 = vs36 483 STXVD2X VS38, (CTX+R_x030) // v6 = vs38 484 485end: 486 RET 487 488