1// Copyright 2017 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build !purego 6 7#include "textflag.h" 8DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01 9DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609 10GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16 11DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00 12DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508 13GLOBL invSRows<>(SB), (NOPTR+RODATA), $16 14// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 15TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 16 MOVD nr+0(FP), R9 17 MOVD xk+8(FP), R10 18 MOVD dst+16(FP), R11 19 MOVD src+24(FP), R12 20 21 VLD1 (R12), [V0.B16] 22 23 CMP $12, R9 24 BLT enc128 25 BEQ enc196 26enc256: 27 VLD1.P 32(R10), [V1.B16, V2.B16] 28 AESE V1.B16, V0.B16 29 AESMC V0.B16, V0.B16 30 AESE V2.B16, V0.B16 31 AESMC V0.B16, V0.B16 32enc196: 33 VLD1.P 32(R10), [V3.B16, V4.B16] 34 AESE V3.B16, V0.B16 35 AESMC V0.B16, V0.B16 36 AESE V4.B16, V0.B16 37 AESMC V0.B16, V0.B16 38enc128: 39 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] 40 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] 41 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] 42 AESE V5.B16, V0.B16 43 AESMC V0.B16, V0.B16 44 AESE V6.B16, V0.B16 45 AESMC V0.B16, V0.B16 46 AESE V7.B16, V0.B16 47 AESMC V0.B16, V0.B16 48 AESE V8.B16, V0.B16 49 AESMC V0.B16, V0.B16 50 AESE V9.B16, V0.B16 51 AESMC V0.B16, V0.B16 52 AESE V10.B16, V0.B16 53 AESMC V0.B16, V0.B16 54 AESE V11.B16, V0.B16 55 AESMC V0.B16, V0.B16 56 AESE V12.B16, V0.B16 57 AESMC V0.B16, V0.B16 58 AESE V13.B16, V0.B16 59 AESMC V0.B16, V0.B16 60 AESE V14.B16, V0.B16 61 VEOR V0.B16, V15.B16, V0.B16 62 VST1 [V0.B16], (R11) 63 RET 64 65// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 66TEXT ·decryptBlockAsm(SB),NOSPLIT,$0 67 MOVD nr+0(FP), R9 68 MOVD xk+8(FP), R10 69 MOVD dst+16(FP), R11 70 MOVD src+24(FP), R12 71 72 VLD1 (R12), [V0.B16] 73 74 CMP $12, R9 75 BLT dec128 76 BEQ dec196 77dec256: 78 VLD1.P 32(R10), [V1.B16, V2.B16] 79 AESD V1.B16, V0.B16 80 AESIMC V0.B16, V0.B16 81 AESD V2.B16, V0.B16 82 AESIMC V0.B16, V0.B16 83dec196: 84 VLD1.P 32(R10), [V3.B16, V4.B16] 85 AESD V3.B16, V0.B16 86 AESIMC V0.B16, V0.B16 87 AESD V4.B16, V0.B16 88 AESIMC V0.B16, V0.B16 89dec128: 90 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] 91 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] 92 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] 93 AESD V5.B16, V0.B16 94 AESIMC V0.B16, V0.B16 95 AESD V6.B16, V0.B16 96 AESIMC V0.B16, V0.B16 97 AESD V7.B16, V0.B16 98 AESIMC V0.B16, V0.B16 99 AESD V8.B16, V0.B16 100 AESIMC V0.B16, V0.B16 101 AESD V9.B16, V0.B16 102 AESIMC V0.B16, V0.B16 103 AESD V10.B16, V0.B16 104 AESIMC V0.B16, V0.B16 105 AESD V11.B16, V0.B16 106 AESIMC V0.B16, V0.B16 107 AESD V12.B16, V0.B16 108 AESIMC V0.B16, V0.B16 109 AESD V13.B16, V0.B16 110 AESIMC V0.B16, V0.B16 111 AESD V14.B16, V0.B16 112 VEOR V0.B16, V15.B16, V0.B16 113 VST1 [V0.B16], (R11) 114 RET 115 116// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) { 117// Note that round keys are stored in uint128 format, not uint32 118TEXT ·expandKeyAsm(SB),NOSPLIT,$0 119 MOVD nr+0(FP), R8 120 MOVD key+8(FP), R9 121 MOVD enc+16(FP), R10 122 MOVD dec+24(FP), R11 123 LDP rotInvSRows<>(SB), (R0, R1) 124 VMOV R0, V3.D[0] 125 VMOV R1, V3.D[1] 126 VEOR V0.B16, V0.B16, V0.B16 // All zeroes 127 MOVW $1, R13 128 TBZ $1, R8, ks192 129 TBNZ $2, R8, ks256 130 LDPW (R9), (R4, R5) 131 LDPW 8(R9), (R6, R7) 132 STPW.P (R4, R5), 8(R10) 133 STPW.P (R6, R7), 8(R10) 134 MOVW $0x1b, R14 135ks128Loop: 136 VMOV R7, V2.S[0] 137 WORD $0x4E030042 // TBL V3.B16, [V2.B16], V2.B16 138 AESE V0.B16, V2.B16 // Use AES to compute the SBOX 139 EORW R13, R4 140 LSLW $1, R13 // Compute next Rcon 141 ANDSW $0x100, R13, ZR 142 CSELW NE, R14, R13, R13 // Fake modulo 143 SUBS $1, R8 144 VMOV V2.S[0], R0 145 EORW R0, R4 146 EORW R4, R5 147 EORW R5, R6 148 EORW R6, R7 149 STPW.P (R4, R5), 8(R10) 150 STPW.P (R6, R7), 8(R10) 151 BNE ks128Loop 152 CBZ R11, ksDone // If dec is nil we are done 153 SUB $176, R10 154 // Decryption keys are encryption keys with InverseMixColumns applied 155 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 156 VMOV V0.B16, V7.B16 157 AESIMC V1.B16, V6.B16 158 AESIMC V2.B16, V5.B16 159 AESIMC V3.B16, V4.B16 160 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 161 AESIMC V0.B16, V11.B16 162 AESIMC V1.B16, V10.B16 163 AESIMC V2.B16, V9.B16 164 AESIMC V3.B16, V8.B16 165 VLD1 (R10), [V0.B16, V1.B16, V2.B16] 166 AESIMC V0.B16, V14.B16 167 AESIMC V1.B16, V13.B16 168 VMOV V2.B16, V12.B16 169 VST1.P [V12.B16, V13.B16, V14.B16], 48(R11) 170 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 171 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 172 B ksDone 173ks192: 174 LDPW (R9), (R2, R3) 175 LDPW 8(R9), (R4, R5) 176 LDPW 16(R9), (R6, R7) 177 STPW.P (R2, R3), 8(R10) 178 STPW.P (R4, R5), 8(R10) 179 SUB $4, R8 180ks192Loop: 181 STPW.P (R6, R7), 8(R10) 182 VMOV R7, V2.S[0] 183 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 184 AESE V0.B16, V2.B16 185 EORW R13, R2 186 LSLW $1, R13 187 SUBS $1, R8 188 VMOV V2.S[0], R0 189 EORW R0, R2 190 EORW R2, R3 191 EORW R3, R4 192 EORW R4, R5 193 EORW R5, R6 194 EORW R6, R7 195 STPW.P (R2, R3), 8(R10) 196 STPW.P (R4, R5), 8(R10) 197 BNE ks192Loop 198 CBZ R11, ksDone 199 SUB $208, R10 200 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 201 VMOV V0.B16, V7.B16 202 AESIMC V1.B16, V6.B16 203 AESIMC V2.B16, V5.B16 204 AESIMC V3.B16, V4.B16 205 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 206 AESIMC V0.B16, V11.B16 207 AESIMC V1.B16, V10.B16 208 AESIMC V2.B16, V9.B16 209 AESIMC V3.B16, V8.B16 210 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 211 AESIMC V0.B16, V15.B16 212 AESIMC V1.B16, V14.B16 213 AESIMC V2.B16, V13.B16 214 AESIMC V3.B16, V12.B16 215 VLD1 (R10), [V0.B16] 216 VST1.P [V0.B16], 16(R11) 217 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) 218 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 219 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 220 B ksDone 221ks256: 222 LDP invSRows<>(SB), (R0, R1) 223 VMOV R0, V4.D[0] 224 VMOV R1, V4.D[1] 225 LDPW (R9), (R0, R1) 226 LDPW 8(R9), (R2, R3) 227 LDPW 16(R9), (R4, R5) 228 LDPW 24(R9), (R6, R7) 229 STPW.P (R0, R1), 8(R10) 230 STPW.P (R2, R3), 8(R10) 231 SUB $7, R8 232ks256Loop: 233 STPW.P (R4, R5), 8(R10) 234 STPW.P (R6, R7), 8(R10) 235 VMOV R7, V2.S[0] 236 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 237 AESE V0.B16, V2.B16 238 EORW R13, R0 239 LSLW $1, R13 240 SUBS $1, R8 241 VMOV V2.S[0], R9 242 EORW R9, R0 243 EORW R0, R1 244 EORW R1, R2 245 EORW R2, R3 246 VMOV R3, V2.S[0] 247 WORD $0x4E040042 //TBL V3.B16, [V2.B16], V2.B16 248 AESE V0.B16, V2.B16 249 VMOV V2.S[0], R9 250 EORW R9, R4 251 EORW R4, R5 252 EORW R5, R6 253 EORW R6, R7 254 STPW.P (R0, R1), 8(R10) 255 STPW.P (R2, R3), 8(R10) 256 BNE ks256Loop 257 CBZ R11, ksDone 258 SUB $240, R10 259 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 260 VMOV V0.B16, V7.B16 261 AESIMC V1.B16, V6.B16 262 AESIMC V2.B16, V5.B16 263 AESIMC V3.B16, V4.B16 264 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 265 AESIMC V0.B16, V11.B16 266 AESIMC V1.B16, V10.B16 267 AESIMC V2.B16, V9.B16 268 AESIMC V3.B16, V8.B16 269 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 270 AESIMC V0.B16, V15.B16 271 AESIMC V1.B16, V14.B16 272 AESIMC V2.B16, V13.B16 273 AESIMC V3.B16, V12.B16 274 VLD1 (R10), [V0.B16, V1.B16, V2.B16] 275 AESIMC V0.B16, V18.B16 276 AESIMC V1.B16, V17.B16 277 VMOV V2.B16, V16.B16 278 VST1.P [V16.B16, V17.B16, V18.B16], 48(R11) 279 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) 280 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 281 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 282ksDone: 283 RET 284