1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// 6// WARNING: this file is built by the bootstrap compiler, thus 7// it must maintain compatibility with the oldest supported 8// bootstrap toolchain. 9// 10 11//go:build !purego && (ppc64 || ppc64le) 12 13// Based on CRYPTOGAMS code with the following comment: 14// # ==================================================================== 15// # Written by Andy Polyakov <[email protected]> for the OpenSSL 16// # project. The module is, however, dual licensed under OpenSSL and 17// # CRYPTOGAMS licenses depending on where you obtain it. For further 18// # details see http://www.openssl.org/~appro/cryptogams/. 19// # ==================================================================== 20 21#include "textflag.h" 22 23// SHA256 block routine. See sha256block.go for Go equivalent. 24// 25// The algorithm is detailed in FIPS 180-4: 26// 27// https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 28// 29// Wt = Mt; for 0 <= t <= 15 30// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 31// 32// a = H0 33// b = H1 34// c = H2 35// d = H3 36// e = H4 37// f = H5 38// g = H6 39// h = H7 40// 41// for t = 0 to 63 { 42// T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 43// T2 = BIGSIGMA0(a) + Maj(a,b,c) 44// h = g 45// g = f 46// f = e 47// e = d + T1 48// d = c 49// c = b 50// b = a 51// a = T1 + T2 52// } 53// 54// H0 = a + H0 55// H1 = b + H1 56// H2 = c + H2 57// H3 = d + H3 58// H4 = e + H4 59// H5 = f + H5 60// H6 = g + H6 61// H7 = h + H7 62 63#define CTX R3 64#define INP R4 65#define END R5 66#define TBL R6 // Pointer into kcon table 67#define LEN R9 68#define TEMP R12 69 70#define TBL_STRT R7 // Pointer to start of kcon table. 71 72#define R_x000 R0 73#define R_x010 R8 74#define R_x020 R10 75#define R_x030 R11 76#define R_x040 R14 77#define R_x050 R15 78#define R_x060 R16 79#define R_x070 R17 80#define R_x080 R18 81#define R_x090 R19 82#define R_x0a0 R20 83#define R_x0b0 R21 84#define R_x0c0 R22 85#define R_x0d0 R23 86#define R_x0e0 R24 87#define R_x0f0 R25 88#define R_x100 R26 89#define R_x110 R27 90 91 92// V0-V7 are A-H 93// V8-V23 are used for the message schedule 94#define KI V24 95#define FUNC V25 96#define S0 V26 97#define S1 V27 98#define s0 V28 99#define s1 V29 100#define LEMASK V31 // Permutation control register for little endian 101 102// 4 copies of each Kt, to fill all 4 words of a vector register 103DATA ·kcon+0x000(SB)/8, $0x428a2f98428a2f98 104DATA ·kcon+0x008(SB)/8, $0x428a2f98428a2f98 105DATA ·kcon+0x010(SB)/8, $0x7137449171374491 106DATA ·kcon+0x018(SB)/8, $0x7137449171374491 107DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf 108DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf 109DATA ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5 110DATA ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5 111DATA ·kcon+0x040(SB)/8, $0x3956c25b3956c25b 112DATA ·kcon+0x048(SB)/8, $0x3956c25b3956c25b 113DATA ·kcon+0x050(SB)/8, $0x59f111f159f111f1 114DATA ·kcon+0x058(SB)/8, $0x59f111f159f111f1 115DATA ·kcon+0x060(SB)/8, $0x923f82a4923f82a4 116DATA ·kcon+0x068(SB)/8, $0x923f82a4923f82a4 117DATA ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5 118DATA ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5 119DATA ·kcon+0x080(SB)/8, $0xd807aa98d807aa98 120DATA ·kcon+0x088(SB)/8, $0xd807aa98d807aa98 121DATA ·kcon+0x090(SB)/8, $0x12835b0112835b01 122DATA ·kcon+0x098(SB)/8, $0x12835b0112835b01 123DATA ·kcon+0x0A0(SB)/8, $0x243185be243185be 124DATA ·kcon+0x0A8(SB)/8, $0x243185be243185be 125DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3 126DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3 127DATA ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74 128DATA ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74 129DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe 130DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe 131DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7 132DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7 133DATA ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174 134DATA ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174 135DATA ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1 136DATA ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1 137DATA ·kcon+0x110(SB)/8, $0xefbe4786efbe4786 138DATA ·kcon+0x118(SB)/8, $0xefbe4786efbe4786 139DATA ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6 140DATA ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6 141DATA ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc 142DATA ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc 143DATA ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f 144DATA ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f 145DATA ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa 146DATA ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa 147DATA ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc 148DATA ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc 149DATA ·kcon+0x170(SB)/8, $0x76f988da76f988da 150DATA ·kcon+0x178(SB)/8, $0x76f988da76f988da 151DATA ·kcon+0x180(SB)/8, $0x983e5152983e5152 152DATA ·kcon+0x188(SB)/8, $0x983e5152983e5152 153DATA ·kcon+0x190(SB)/8, $0xa831c66da831c66d 154DATA ·kcon+0x198(SB)/8, $0xa831c66da831c66d 155DATA ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8 156DATA ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8 157DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7 158DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7 159DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3 160DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3 161DATA ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147 162DATA ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147 163DATA ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351 164DATA ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351 165DATA ·kcon+0x1F0(SB)/8, $0x1429296714292967 166DATA ·kcon+0x1F8(SB)/8, $0x1429296714292967 167DATA ·kcon+0x200(SB)/8, $0x27b70a8527b70a85 168DATA ·kcon+0x208(SB)/8, $0x27b70a8527b70a85 169DATA ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138 170DATA ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138 171DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc 172DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc 173DATA ·kcon+0x230(SB)/8, $0x53380d1353380d13 174DATA ·kcon+0x238(SB)/8, $0x53380d1353380d13 175DATA ·kcon+0x240(SB)/8, $0x650a7354650a7354 176DATA ·kcon+0x248(SB)/8, $0x650a7354650a7354 177DATA ·kcon+0x250(SB)/8, $0x766a0abb766a0abb 178DATA ·kcon+0x258(SB)/8, $0x766a0abb766a0abb 179DATA ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e 180DATA ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e 181DATA ·kcon+0x270(SB)/8, $0x92722c8592722c85 182DATA ·kcon+0x278(SB)/8, $0x92722c8592722c85 183DATA ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1 184DATA ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1 185DATA ·kcon+0x290(SB)/8, $0xa81a664ba81a664b 186DATA ·kcon+0x298(SB)/8, $0xa81a664ba81a664b 187DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70 188DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70 189DATA ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3 190DATA ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3 191DATA ·kcon+0x2C0(SB)/8, $0xd192e819d192e819 192DATA ·kcon+0x2C8(SB)/8, $0xd192e819d192e819 193DATA ·kcon+0x2D0(SB)/8, $0xd6990624d6990624 194DATA ·kcon+0x2D8(SB)/8, $0xd6990624d6990624 195DATA ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585 196DATA ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585 197DATA ·kcon+0x2F0(SB)/8, $0x106aa070106aa070 198DATA ·kcon+0x2F8(SB)/8, $0x106aa070106aa070 199DATA ·kcon+0x300(SB)/8, $0x19a4c11619a4c116 200DATA ·kcon+0x308(SB)/8, $0x19a4c11619a4c116 201DATA ·kcon+0x310(SB)/8, $0x1e376c081e376c08 202DATA ·kcon+0x318(SB)/8, $0x1e376c081e376c08 203DATA ·kcon+0x320(SB)/8, $0x2748774c2748774c 204DATA ·kcon+0x328(SB)/8, $0x2748774c2748774c 205DATA ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5 206DATA ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5 207DATA ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3 208DATA ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3 209DATA ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a 210DATA ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a 211DATA ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f 212DATA ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f 213DATA ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3 214DATA ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3 215DATA ·kcon+0x380(SB)/8, $0x748f82ee748f82ee 216DATA ·kcon+0x388(SB)/8, $0x748f82ee748f82ee 217DATA ·kcon+0x390(SB)/8, $0x78a5636f78a5636f 218DATA ·kcon+0x398(SB)/8, $0x78a5636f78a5636f 219DATA ·kcon+0x3A0(SB)/8, $0x84c8781484c87814 220DATA ·kcon+0x3A8(SB)/8, $0x84c8781484c87814 221DATA ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208 222DATA ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208 223DATA ·kcon+0x3C0(SB)/8, $0x90befffa90befffa 224DATA ·kcon+0x3C8(SB)/8, $0x90befffa90befffa 225DATA ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb 226DATA ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb 227DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7 228DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7 229DATA ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2 230DATA ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2 231DATA ·kcon+0x400(SB)/8, $0x0000000000000000 232DATA ·kcon+0x408(SB)/8, $0x0000000000000000 233 234#ifdef GOARCH_ppc64le 235DATA ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors 236DATA ·kcon+0x418(SB)/8, $0x1011121300010203 237DATA ·kcon+0x420(SB)/8, $0x1011121310111213 238DATA ·kcon+0x428(SB)/8, $0x0405060700010203 239DATA ·kcon+0x430(SB)/8, $0x1011121308090a0b 240DATA ·kcon+0x438(SB)/8, $0x0405060700010203 241#else 242DATA ·kcon+0x410(SB)/8, $0x1011121300010203 243DATA ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors 244DATA ·kcon+0x420(SB)/8, $0x0405060700010203 245DATA ·kcon+0x428(SB)/8, $0x1011121310111213 246DATA ·kcon+0x430(SB)/8, $0x0001020304050607 247DATA ·kcon+0x438(SB)/8, $0x08090a0b10111213 248#endif 249 250GLOBL ·kcon(SB), RODATA, $1088 251 252#define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \ 253 VSEL g, f, e, FUNC; \ 254 VSHASIGMAW $15, e, $1, S1; \ 255 VADDUWM xi, h, h; \ 256 VSHASIGMAW $0, a, $1, S0; \ 257 VADDUWM FUNC, h, h; \ 258 VXOR b, a, FUNC; \ 259 VADDUWM S1, h, h; \ 260 VSEL b, c, FUNC, FUNC; \ 261 VADDUWM KI, g, g; \ 262 VADDUWM h, d, d; \ 263 VADDUWM FUNC, S0, S0; \ 264 LVX (TBL)(idx), KI; \ 265 VADDUWM S0, h, h 266 267#define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \ 268 VSHASIGMAW $0, xj_1, $0, s0; \ 269 VSEL g, f, e, FUNC; \ 270 VSHASIGMAW $15, e, $1, S1; \ 271 VADDUWM xi, h, h; \ 272 VSHASIGMAW $0, a, $1, S0; \ 273 VSHASIGMAW $15, xj_14, $0, s1; \ 274 VADDUWM FUNC, h, h; \ 275 VXOR b, a, FUNC; \ 276 VADDUWM xj_9, xj, xj; \ 277 VADDUWM S1, h, h; \ 278 VSEL b, c, FUNC, FUNC; \ 279 VADDUWM KI, g, g; \ 280 VADDUWM h, d, d; \ 281 VADDUWM FUNC, S0, S0; \ 282 VADDUWM s0, xj, xj; \ 283 LVX (TBL)(idx), KI; \ 284 VADDUWM S0, h, h; \ 285 VADDUWM s1, xj, xj 286 287#ifdef GOARCH_ppc64le 288#define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 289#else 290#define VPERMLE(va,vb,vc,vt) 291#endif 292 293// func block(dig *digest, p []byte) 294TEXT ·block(SB),0,$0-32 295 MOVD dig+0(FP), CTX 296 MOVD p_base+8(FP), INP 297 MOVD p_len+16(FP), LEN 298 299 SRD $6, LEN 300 SLD $6, LEN 301 ADD INP, LEN, END 302 303 CMP INP, END 304 BEQ end 305 306 MOVD $·kcon(SB), TBL_STRT 307 MOVD $0x10, R_x010 308 309#ifdef GOARCH_ppc64le 310 MOVWZ $8, TEMP 311 LVSL (TEMP)(R0), LEMASK 312 VSPLTISB $0x0F, KI 313 VXOR KI, LEMASK, LEMASK 314#endif 315 316 LXVW4X (CTX)(R_x000), V0 317 LXVW4X (CTX)(R_x010), V4 318 319 // unpack the input values into vector registers 320 VSLDOI $4, V0, V0, V1 321 VSLDOI $8, V0, V0, V2 322 VSLDOI $12, V0, V0, V3 323 VSLDOI $4, V4, V4, V5 324 VSLDOI $8, V4, V4, V6 325 VSLDOI $12, V4, V4, V7 326 327 MOVD $0x020, R_x020 328 MOVD $0x030, R_x030 329 MOVD $0x040, R_x040 330 MOVD $0x050, R_x050 331 MOVD $0x060, R_x060 332 MOVD $0x070, R_x070 333 MOVD $0x080, R_x080 334 MOVD $0x090, R_x090 335 MOVD $0x0a0, R_x0a0 336 MOVD $0x0b0, R_x0b0 337 MOVD $0x0c0, R_x0c0 338 MOVD $0x0d0, R_x0d0 339 MOVD $0x0e0, R_x0e0 340 MOVD $0x0f0, R_x0f0 341 MOVD $0x100, R_x100 342 MOVD $0x110, R_x110 343 344loop: 345 MOVD TBL_STRT, TBL 346 LVX (TBL)(R_x000), KI 347 348 LXVD2X (INP)(R_x000), V8 // load v8 in advance 349 350 // Offload to VSR24-31 (aka FPR24-31) 351 XXLOR V0, V0, VS24 352 XXLOR V1, V1, VS25 353 XXLOR V2, V2, VS26 354 XXLOR V3, V3, VS27 355 XXLOR V4, V4, VS28 356 XXLOR V5, V5, VS29 357 XXLOR V6, V6, VS30 358 XXLOR V7, V7, VS31 359 360 VADDUWM KI, V7, V7 // h+K[i] 361 LVX (TBL)(R_x010), KI 362 363 VPERMLE(V8, V8, LEMASK, V8) 364 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020) 365 VSLDOI $4, V8, V8, V9 366 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030) 367 VSLDOI $4, V9, V9, V10 368 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040) 369 LXVD2X (INP)(R_x010), V12 // load v12 in advance 370 VSLDOI $4, V10, V10, V11 371 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050) 372 VPERMLE(V12, V12, LEMASK, V12) 373 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060) 374 VSLDOI $4, V12, V12, V13 375 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070) 376 VSLDOI $4, V13, V13, V14 377 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080) 378 LXVD2X (INP)(R_x020), V16 // load v16 in advance 379 VSLDOI $4, V14, V14, V15 380 SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090) 381 VPERMLE(V16, V16, LEMASK, V16) 382 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0) 383 VSLDOI $4, V16, V16, V17 384 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0) 385 VSLDOI $4, V17, V17, V18 386 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0) 387 VSLDOI $4, V18, V18, V19 388 LXVD2X (INP)(R_x030), V20 // load v20 in advance 389 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0) 390 VPERMLE(V20, V20, LEMASK, V20) 391 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0) 392 VSLDOI $4, V20, V20, V21 393 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0) 394 VSLDOI $4, V21, V21, V22 395 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100) 396 VSLDOI $4, V22, V22, V23 397 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110) 398 399 MOVD $3, TEMP 400 MOVD TEMP, CTR 401 ADD $0x120, TBL 402 ADD $0x40, INP 403 404L16_xx: 405 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000) 406 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010) 407 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020) 408 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030) 409 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040) 410 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050) 411 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060) 412 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070) 413 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080) 414 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090) 415 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0) 416 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0) 417 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0) 418 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0) 419 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0) 420 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0) 421 ADD $0x100, TBL 422 423 BDNZ L16_xx 424 425 XXLOR VS24, VS24, V10 426 427 XXLOR VS25, VS25, V11 428 VADDUWM V10, V0, V0 429 XXLOR VS26, VS26, V12 430 VADDUWM V11, V1, V1 431 XXLOR VS27, VS27, V13 432 VADDUWM V12, V2, V2 433 XXLOR VS28, VS28, V14 434 VADDUWM V13, V3, V3 435 XXLOR VS29, VS29, V15 436 VADDUWM V14, V4, V4 437 XXLOR VS30, VS30, V16 438 VADDUWM V15, V5, V5 439 XXLOR VS31, VS31, V17 440 VADDUWM V16, V6, V6 441 VADDUWM V17, V7, V7 442 443 CMPU INP, END 444 BLT loop 445 446 LVX (TBL)(R_x000), V8 447 VPERM V0, V1, KI, V0 448 LVX (TBL)(R_x010), V9 449 VPERM V4, V5, KI, V4 450 VPERM V0, V2, V8, V0 451 VPERM V4, V6, V8, V4 452 VPERM V0, V3, V9, V0 453 VPERM V4, V7, V9, V4 454 STXVD2X V0, (CTX+R_x000) 455 STXVD2X V4, (CTX+R_x010) 456 457end: 458 RET 459 460