1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build (ppc64 || ppc64le) && !purego 6 7// Based on CRYPTOGAMS code with the following comment: 8// # ==================================================================== 9// # Written by Andy Polyakov <[email protected]> for the OpenSSL 10// # project. The module is, however, dual licensed under OpenSSL and 11// # CRYPTOGAMS licenses depending on where you obtain it. For further 12// # details see http://www.openssl.org/~appro/cryptogams/. 13// # ==================================================================== 14 15// Original code can be found at the link below: 16// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl 17 18// Some function names were changed to be consistent with Go function 19// names. For instance, function aes_p8_set_{en,de}crypt_key become 20// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts 21// and a new session was created (doEncryptKeyAsm). This was necessary to 22// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. 23// There were other modifications as well but kept the same functionality. 24 25#include "textflag.h" 26 27// For expandKeyAsm 28#define INP R3 29#define BITS R4 30#define OUTENC R5 // Pointer to next expanded encrypt key 31#define PTR R6 32#define CNT R7 33#define ROUNDS R8 34#define OUTDEC R9 // Pointer to next expanded decrypt key 35#define TEMP R19 36#define ZERO V0 37#define IN0 V1 38#define IN1 V2 39#define KEY V3 40#define RCON V4 41#define MASK V5 42#define TMP V6 43#define STAGE V7 44#define OUTPERM V8 45#define OUTMASK V9 46#define OUTHEAD V10 47#define OUTTAIL V11 48 49// For P9 instruction emulation 50#define ESPERM V21 // Endian swapping permute into BE 51#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X 52 53// For {en,de}cryptBlockAsm 54#define BLK_INP R3 55#define BLK_OUT R4 56#define BLK_KEY R5 57#define BLK_ROUNDS R6 58#define BLK_IDX R7 59 60DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap 61DATA ·rcon+0x08(SB)/8, $0x0706050403020100 62DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON 63DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON 64DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000 65DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000 66DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 67DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 68DATA ·rcon+0x40(SB)/8, $0x0000000000000000 69DATA ·rcon+0x48(SB)/8, $0x0000000000000000 70GLOBL ·rcon(SB), RODATA, $80 71 72#ifdef GOARCH_ppc64le 73# ifdef GOPPC64_power9 74#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT 75#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB) 76#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT 77# else 78// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned 79// doublewords and byte-swapping each doubleword to emulate BE load/stores. 80#define NEEDS_ESPERM 81#define P8_LXVB16X(RA,RB,VT) \ 82 LXVD2X (RA+RB), VT \ 83 VPERM VT, VT, ESPERM, VT 84 85#define P8_STXVB16X(VS,RA,RB) \ 86 VPERM VS, VS, ESPERM, TMP2 \ 87 STXVD2X TMP2, (RA+RB) 88 89#define XXBRD_ON_LE(VA,VT) \ 90 VPERM VA, VA, ESPERM, VT 91 92# endif // defined(GOPPC64_power9) 93#else 94#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT 95#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB) 96#define XXBRD_ON_LE(VA, VT) 97#endif // defined(GOARCH_ppc64le) 98 99// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) 100TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 101 // Load the arguments inside the registers 102 MOVD nr+0(FP), ROUNDS 103 MOVD key+8(FP), INP 104 MOVD enc+16(FP), OUTENC 105 MOVD dec+24(FP), OUTDEC 106 107#ifdef NEEDS_ESPERM 108 MOVD $·rcon(SB), PTR // PTR points to rcon addr 109 LVX (PTR), ESPERM 110 ADD $0x10, PTR 111#else 112 MOVD $·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector) 113#endif 114 115 // Get key from memory and write aligned into VR 116 P8_LXVB16X(INP, R0, IN0) 117 ADD $0x10, INP, INP 118 MOVD $0x20, TEMP 119 120 CMPW ROUNDS, $12 121 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 122 LVX (PTR)(TEMP), MASK 123 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 124 MOVD $8, CNT // li 7,8 CNT = 8 125 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 126 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 127 128 // The expanded decrypt key is the expanded encrypt key stored in reverse order. 129 // Move OUTDEC to the last key location, and store in descending order. 130 ADD $160, OUTDEC, OUTDEC 131 BLT loop128 132 ADD $32, OUTDEC, OUTDEC 133 BEQ l192 134 ADD $32, OUTDEC, OUTDEC 135 JMP l256 136 137loop128: 138 // Key schedule (Round 1 to 8) 139 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 140 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 141 STXVD2X IN0, (R0+OUTENC) 142 STXVD2X IN0, (R0+OUTDEC) 143 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 144 ADD $16, OUTENC, OUTENC 145 ADD $-16, OUTDEC, OUTDEC 146 147 VXOR IN0, TMP, IN0 // vxor 1,1,6 148 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 149 VXOR IN0, TMP, IN0 // vxor 1,1,6 150 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 151 VXOR IN0, TMP, IN0 // vxor 1,1,6 152 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 153 VXOR IN0, KEY, IN0 // vxor 1,1,3 154 BDNZ loop128 155 156 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 157 158 // Key schedule (Round 9) 159 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 160 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 161 STXVD2X IN0, (R0+OUTENC) 162 STXVD2X IN0, (R0+OUTDEC) 163 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 164 ADD $16, OUTENC, OUTENC 165 ADD $-16, OUTDEC, OUTDEC 166 167 // Key schedule (Round 10) 168 VXOR IN0, TMP, IN0 // vxor 1,1,6 169 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 170 VXOR IN0, TMP, IN0 // vxor 1,1,6 171 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 172 VXOR IN0, TMP, IN0 // vxor 1,1,6 173 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 174 VXOR IN0, KEY, IN0 // vxor 1,1,3 175 176 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 177 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 178 STXVD2X IN0, (R0+OUTENC) 179 STXVD2X IN0, (R0+OUTDEC) 180 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 181 ADD $16, OUTENC, OUTENC 182 ADD $-16, OUTDEC, OUTDEC 183 184 // Key schedule (Round 11) 185 VXOR IN0, TMP, IN0 // vxor 1,1,6 186 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 187 VXOR IN0, TMP, IN0 // vxor 1,1,6 188 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 189 VXOR IN0, TMP, IN0 // vxor 1,1,6 190 VXOR IN0, KEY, IN0 // vxor 1,1,3 191 STXVD2X IN0, (R0+OUTENC) 192 STXVD2X IN0, (R0+OUTDEC) 193 194 RET 195 196l192: 197 LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR. 198 XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts. 199 MOVD $4, CNT // li 7,4 200 STXVD2X IN0, (R0+OUTENC) 201 STXVD2X IN0, (R0+OUTDEC) 202 ADD $16, OUTENC, OUTENC 203 ADD $-16, OUTDEC, OUTDEC 204 VSPLTISB $8, KEY // vspltisb 3,8 205 MOVD CNT, CTR // mtctr 7 206 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 207 208loop192: 209 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 210 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 211 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 212 213 VXOR IN0, TMP, IN0 // vxor 1,1,6 214 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 215 VXOR IN0, TMP, IN0 // vxor 1,1,6 216 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 217 VXOR IN0, TMP, IN0 // vxor 1,1,6 218 219 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 220 VSPLTW $3, IN0, TMP // vspltw 6,1,3 221 VXOR TMP, IN1, TMP // vxor 6,6,2 222 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 223 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 224 VXOR IN1, TMP, IN1 // vxor 2,2,6 225 VXOR IN0, KEY, IN0 // vxor 1,1,3 226 VXOR IN1, KEY, IN1 // vxor 2,2,3 227 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 228 229 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 230 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 231 STXVD2X STAGE, (R0+OUTENC) 232 STXVD2X STAGE, (R0+OUTDEC) 233 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 234 ADD $16, OUTENC, OUTENC 235 ADD $-16, OUTDEC, OUTDEC 236 237 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 238 VXOR IN0, TMP, IN0 // vxor 1,1,6 239 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 240 STXVD2X STAGE, (R0+OUTENC) 241 STXVD2X STAGE, (R0+OUTDEC) 242 VXOR IN0, TMP, IN0 // vxor 1,1,6 243 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 244 VXOR IN0, TMP, IN0 // vxor 1,1,6 245 ADD $16, OUTENC, OUTENC 246 ADD $-16, OUTDEC, OUTDEC 247 248 VSPLTW $3, IN0, TMP // vspltw 6,1,3 249 VXOR TMP, IN1, TMP // vxor 6,6,2 250 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 251 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 252 VXOR IN1, TMP, IN1 // vxor 2,2,6 253 VXOR IN0, KEY, IN0 // vxor 1,1,3 254 VXOR IN1, KEY, IN1 // vxor 2,2,3 255 STXVD2X IN0, (R0+OUTENC) 256 STXVD2X IN0, (R0+OUTDEC) 257 ADD $16, OUTENC, OUTENC 258 ADD $-16, OUTDEC, OUTDEC 259 BDNZ loop192 260 261 RET 262 263l256: 264 P8_LXVB16X(INP, R0, IN1) 265 MOVD $7, CNT // li 7,7 266 STXVD2X IN0, (R0+OUTENC) 267 STXVD2X IN0, (R0+OUTDEC) 268 ADD $16, OUTENC, OUTENC 269 ADD $-16, OUTDEC, OUTDEC 270 MOVD CNT, CTR // mtctr 7 271 272loop256: 273 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 274 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 275 STXVD2X IN1, (R0+OUTENC) 276 STXVD2X IN1, (R0+OUTDEC) 277 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 278 ADD $16, OUTENC, OUTENC 279 ADD $-16, OUTDEC, OUTDEC 280 281 VXOR IN0, TMP, IN0 // vxor 1,1,6 282 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 283 VXOR IN0, TMP, IN0 // vxor 1,1,6 284 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 285 VXOR IN0, TMP, IN0 // vxor 1,1,6 286 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 287 VXOR IN0, KEY, IN0 // vxor 1,1,3 288 STXVD2X IN0, (R0+OUTENC) 289 STXVD2X IN0, (R0+OUTDEC) 290 ADD $16, OUTENC, OUTENC 291 ADD $-16, OUTDEC, OUTDEC 292 BDZ done 293 294 VSPLTW $3, IN0, KEY // vspltw 3,1,3 295 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 296 VSBOX KEY, KEY // vsbox 3,3 297 298 VXOR IN1, TMP, IN1 // vxor 2,2,6 299 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 300 VXOR IN1, TMP, IN1 // vxor 2,2,6 301 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 302 VXOR IN1, TMP, IN1 // vxor 2,2,6 303 304 VXOR IN1, KEY, IN1 // vxor 2,2,3 305 JMP loop256 // b .Loop256 306 307done: 308 RET 309 310// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 311TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 312 MOVD nr+0(FP), R6 // Round count/Key size 313 MOVD xk+8(FP), R5 // Key pointer 314 MOVD dst+16(FP), R3 // Dest pointer 315 MOVD src+24(FP), R4 // Src pointer 316#ifdef NEEDS_ESPERM 317 MOVD $·rcon(SB), R7 318 LVX (R7), ESPERM // Permute value for P8_ macros. 319#endif 320 321 // Set CR{1,2,3}EQ to hold the key size information. 322 CMPU R6, $10, CR1 323 CMPU R6, $12, CR2 324 CMPU R6, $14, CR3 325 326 MOVD $16, R6 327 MOVD $32, R7 328 MOVD $48, R8 329 MOVD $64, R9 330 MOVD $80, R10 331 MOVD $96, R11 332 MOVD $112, R12 333 334 // Load text in BE order 335 P8_LXVB16X(R4, R0, V0) 336 337 // V1, V2 will hold keys, V0 is a temp. 338 // At completion, V2 will hold the ciphertext. 339 // Load xk[0:3] and xor with text 340 LXVD2X (R0+R5), V1 341 VXOR V0, V1, V0 342 343 // Load xk[4:11] and cipher 344 LXVD2X (R6+R5), V1 345 LXVD2X (R7+R5), V2 346 VCIPHER V0, V1, V0 347 VCIPHER V0, V2, V0 348 349 // Load xk[12:19] and cipher 350 LXVD2X (R8+R5), V1 351 LXVD2X (R9+R5), V2 352 VCIPHER V0, V1, V0 353 VCIPHER V0, V2, V0 354 355 // Load xk[20:27] and cipher 356 LXVD2X (R10+R5), V1 357 LXVD2X (R11+R5), V2 358 VCIPHER V0, V1, V0 359 VCIPHER V0, V2, V0 360 361 // Increment xk pointer to reuse constant offsets in R6-R12. 362 ADD $112, R5 363 364 // Load xk[28:35] and cipher 365 LXVD2X (R0+R5), V1 366 LXVD2X (R6+R5), V2 367 VCIPHER V0, V1, V0 368 VCIPHER V0, V2, V0 369 370 // Load xk[36:43] and cipher 371 LXVD2X (R7+R5), V1 372 LXVD2X (R8+R5), V2 373 BEQ CR1, Ldec_tail // Key size 10? 374 VCIPHER V0, V1, V0 375 VCIPHER V0, V2, V0 376 377 // Load xk[44:51] and cipher 378 LXVD2X (R9+R5), V1 379 LXVD2X (R10+R5), V2 380 BEQ CR2, Ldec_tail // Key size 12? 381 VCIPHER V0, V1, V0 382 VCIPHER V0, V2, V0 383 384 // Load xk[52:59] and cipher 385 LXVD2X (R11+R5), V1 386 LXVD2X (R12+R5), V2 387 BNE CR3, Linvalid_key_len // Not key size 14? 388 // Fallthrough to final cipher 389 390Ldec_tail: 391 // Cipher last two keys such that key information is 392 // cleared from V1 and V2. 393 VCIPHER V0, V1, V1 394 VCIPHERLAST V1, V2, V2 395 396 // Store the result in BE order. 397 P8_STXVB16X(V2, R3, R0) 398 RET 399 400Linvalid_key_len: 401 // Segfault, this should never happen. Only 3 keys sizes are created/used. 402 MOVD R0, 0(R0) 403 RET 404 405// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 406TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 407 MOVD nr+0(FP), R6 // Round count/Key size 408 MOVD xk+8(FP), R5 // Key pointer 409 MOVD dst+16(FP), R3 // Dest pointer 410 MOVD src+24(FP), R4 // Src pointer 411#ifdef NEEDS_ESPERM 412 MOVD $·rcon(SB), R7 413 LVX (R7), ESPERM // Permute value for P8_ macros. 414#endif 415 416 // Set CR{1,2,3}EQ to hold the key size information. 417 CMPU R6, $10, CR1 418 CMPU R6, $12, CR2 419 CMPU R6, $14, CR3 420 421 MOVD $16, R6 422 MOVD $32, R7 423 MOVD $48, R8 424 MOVD $64, R9 425 MOVD $80, R10 426 MOVD $96, R11 427 MOVD $112, R12 428 429 // Load text in BE order 430 P8_LXVB16X(R4, R0, V0) 431 432 // V1, V2 will hold keys, V0 is a temp. 433 // At completion, V2 will hold the text. 434 // Load xk[0:3] and xor with ciphertext 435 LXVD2X (R0+R5), V1 436 VXOR V0, V1, V0 437 438 // Load xk[4:11] and cipher 439 LXVD2X (R6+R5), V1 440 LXVD2X (R7+R5), V2 441 VNCIPHER V0, V1, V0 442 VNCIPHER V0, V2, V0 443 444 // Load xk[12:19] and cipher 445 LXVD2X (R8+R5), V1 446 LXVD2X (R9+R5), V2 447 VNCIPHER V0, V1, V0 448 VNCIPHER V0, V2, V0 449 450 // Load xk[20:27] and cipher 451 LXVD2X (R10+R5), V1 452 LXVD2X (R11+R5), V2 453 VNCIPHER V0, V1, V0 454 VNCIPHER V0, V2, V0 455 456 // Increment xk pointer to reuse constant offsets in R6-R12. 457 ADD $112, R5 458 459 // Load xk[28:35] and cipher 460 LXVD2X (R0+R5), V1 461 LXVD2X (R6+R5), V2 462 VNCIPHER V0, V1, V0 463 VNCIPHER V0, V2, V0 464 465 // Load xk[36:43] and cipher 466 LXVD2X (R7+R5), V1 467 LXVD2X (R8+R5), V2 468 BEQ CR1, Ldec_tail // Key size 10? 469 VNCIPHER V0, V1, V0 470 VNCIPHER V0, V2, V0 471 472 // Load xk[44:51] and cipher 473 LXVD2X (R9+R5), V1 474 LXVD2X (R10+R5), V2 475 BEQ CR2, Ldec_tail // Key size 12? 476 VNCIPHER V0, V1, V0 477 VNCIPHER V0, V2, V0 478 479 // Load xk[52:59] and cipher 480 LXVD2X (R11+R5), V1 481 LXVD2X (R12+R5), V2 482 BNE CR3, Linvalid_key_len // Not key size 14? 483 // Fallthrough to final cipher 484 485Ldec_tail: 486 // Cipher last two keys such that key information is 487 // cleared from V1 and V2. 488 VNCIPHER V0, V1, V1 489 VNCIPHERLAST V1, V2, V2 490 491 // Store the result in BE order. 492 P8_STXVB16X(V2, R3, R0) 493 RET 494 495Linvalid_key_len: 496 // Segfault, this should never happen. Only 3 keys sizes are created/used. 497 MOVD R0, 0(R0) 498 RET 499 500// Remove defines from above so they can be defined here 501#undef INP 502#undef OUTENC 503#undef ROUNDS 504#undef KEY 505#undef TMP 506 507#define INP R3 508#define OUTP R4 509#define LEN R5 510#define KEYP R6 511#define ROUNDS R7 512#define IVP R8 513#define ENC R9 514 515#define INOUT V2 516#define TMP V3 517#define IVEC V4 518 519// Load the crypt key into VSRs. 520// 521// The expanded key is stored and loaded using 522// STXVD2X/LXVD2X. The in-memory byte ordering 523// depends on the endianness of the machine. The 524// expanded keys are generated by expandKeyAsm above. 525// 526// Rkeyp holds the key pointer. It is clobbered. Once 527// the expanded keys are loaded, it is not needed. 528// 529// R12,R14-R21 are scratch registers. 530// For keyp of 10, V6, V11-V20 hold the expanded key. 531// For keyp of 12, V6, V9-V20 hold the expanded key. 532// For keyp of 14, V6, V7-V20 hold the expanded key. 533#define LOAD_KEY(Rkeyp) \ 534 MOVD $16, R12 \ 535 MOVD $32, R14 \ 536 MOVD $48, R15 \ 537 MOVD $64, R16 \ 538 MOVD $80, R17 \ 539 MOVD $96, R18 \ 540 MOVD $112, R19 \ 541 MOVD $128, R20 \ 542 MOVD $144, R21 \ 543 LXVD2X (R0+Rkeyp), V6 \ 544 ADD $16, Rkeyp \ 545 BEQ CR1, L_start10 \ 546 BEQ CR2, L_start12 \ 547 LXVD2X (R0+Rkeyp), V7 \ 548 LXVD2X (R12+Rkeyp), V8 \ 549 ADD $32, Rkeyp \ 550 L_start12: \ 551 LXVD2X (R0+Rkeyp), V9 \ 552 LXVD2X (R12+Rkeyp), V10 \ 553 ADD $32, Rkeyp \ 554 L_start10: \ 555 LXVD2X (R0+Rkeyp), V11 \ 556 LXVD2X (R12+Rkeyp), V12 \ 557 LXVD2X (R14+Rkeyp), V13 \ 558 LXVD2X (R15+Rkeyp), V14 \ 559 LXVD2X (R16+Rkeyp), V15 \ 560 LXVD2X (R17+Rkeyp), V16 \ 561 LXVD2X (R18+Rkeyp), V17 \ 562 LXVD2X (R19+Rkeyp), V18 \ 563 LXVD2X (R20+Rkeyp), V19 \ 564 LXVD2X (R21+Rkeyp), V20 565 566// Perform aes cipher operation for keysize 10/12/14 using the keys 567// loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ. 568// 569// Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting 570// performance V6 and IVEC can be swapped (xor is both associative and 571// commutative) during encryption: 572// 573// VXOR INOUT, IVEC, INOUT 574// VXOR INOUT, V6, INOUT 575// 576// into 577// 578// VXOR INOUT, V6, INOUT 579// VXOR INOUT, IVEC, INOUT 580// 581#define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \ 582 VXOR Vin, Vxor, Vout \ 583 BEQ CR1, label10 \ 584 BEQ CR2, label12 \ 585 vcipher Vout, V7, Vout \ 586 vcipher Vout, V8, Vout \ 587 label12: \ 588 vcipher Vout, V9, Vout \ 589 vcipher Vout, V10, Vout \ 590 label10: \ 591 vcipher Vout, V11, Vout \ 592 vcipher Vout, V12, Vout \ 593 vcipher Vout, V13, Vout \ 594 vcipher Vout, V14, Vout \ 595 vcipher Vout, V15, Vout \ 596 vcipher Vout, V16, Vout \ 597 vcipher Vout, V17, Vout \ 598 vcipher Vout, V18, Vout \ 599 vcipher Vout, V19, Vout \ 600 vciphel Vout, V20, Vout \ 601 602#define CLEAR_KEYS() \ 603 VXOR V6, V6, V6 \ 604 VXOR V7, V7, V7 \ 605 VXOR V8, V8, V8 \ 606 VXOR V9, V9, V9 \ 607 VXOR V10, V10, V10 \ 608 VXOR V11, V11, V11 \ 609 VXOR V12, V12, V12 \ 610 VXOR V13, V13, V13 \ 611 VXOR V14, V14, V14 \ 612 VXOR V15, V15, V15 \ 613 VXOR V16, V16, V16 \ 614 VXOR V17, V17, V17 \ 615 VXOR V18, V18, V18 \ 616 VXOR V19, V19, V19 \ 617 VXOR V20, V20, V20 618 619//func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int) 620TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 621 MOVD src+0(FP), INP 622 MOVD dst+8(FP), OUTP 623 MOVD length+16(FP), LEN 624 MOVD key+24(FP), KEYP 625 MOVD iv+32(FP), IVP 626 MOVD enc+40(FP), ENC 627 MOVD nr+48(FP), ROUNDS 628 629#ifdef NEEDS_ESPERM 630 MOVD $·rcon(SB), R11 631 LVX (R11), ESPERM // Permute value for P8_ macros. 632#endif 633 634 // Assume len > 0 && len % blockSize == 0. 635 CMPW ENC, $0 636 P8_LXVB16X(IVP, R0, IVEC) 637 CMPU ROUNDS, $10, CR1 638 CMPU ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported. 639 640 // Setup key in VSRs, and set loop count in CTR. 641 LOAD_KEY(KEYP) 642 SRD $4, LEN 643 MOVD LEN, CTR 644 645 BEQ Lcbc_dec 646 647 PCALIGN $16 648Lcbc_enc: 649 P8_LXVB16X(INP, R0, INOUT) 650 ADD $16, INP 651 VXOR INOUT, V6, INOUT 652 CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12) 653 VOR INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block. 654 P8_STXVB16X(INOUT, OUTP, R0) 655 ADD $16, OUTP 656 BDNZ Lcbc_enc 657 658 P8_STXVB16X(INOUT, IVP, R0) 659 CLEAR_KEYS() 660 RET 661 662 PCALIGN $16 663Lcbc_dec: 664 P8_LXVB16X(INP, R0, TMP) 665 ADD $16, INP 666 CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12) 667 VXOR INOUT, IVEC, INOUT 668 VOR TMP, TMP, IVEC // TMP is IVEC for next block. 669 P8_STXVB16X(INOUT, OUTP, R0) 670 ADD $16, OUTP 671 BDNZ Lcbc_dec 672 673 P8_STXVB16X(IVEC, IVP, R0) 674 CLEAR_KEYS() 675 RET 676