1*f6dc9357SAndroid Build Coastguard Worker; AesOpt.asm -- AES optimized code for x86 AES hardware instructions 2*f6dc9357SAndroid Build Coastguard Worker; 2021-12-25 : Igor Pavlov : Public domain 3*f6dc9357SAndroid Build Coastguard Worker 4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm 5*f6dc9357SAndroid Build Coastguard Worker 6*f6dc9357SAndroid Build Coastguard Workerifdef __ASMC__ 7*f6dc9357SAndroid Build Coastguard Worker use_vaes_256 equ 1 8*f6dc9357SAndroid Build Coastguard Workerelse 9*f6dc9357SAndroid Build Coastguard Workerifdef ymm0 10*f6dc9357SAndroid Build Coastguard Worker use_vaes_256 equ 1 11*f6dc9357SAndroid Build Coastguard Workerendif 12*f6dc9357SAndroid Build Coastguard Workerendif 13*f6dc9357SAndroid Build Coastguard Worker 14*f6dc9357SAndroid Build Coastguard Worker 15*f6dc9357SAndroid Build Coastguard Workerifdef use_vaes_256 16*f6dc9357SAndroid Build Coastguard Worker ECHO "++ VAES 256" 17*f6dc9357SAndroid Build Coastguard Workerelse 18*f6dc9357SAndroid Build Coastguard Worker ECHO "-- NO VAES 256" 19*f6dc9357SAndroid Build Coastguard Workerendif 20*f6dc9357SAndroid Build Coastguard Worker 21*f6dc9357SAndroid Build Coastguard Workerifdef x64 22*f6dc9357SAndroid Build Coastguard Worker ECHO "x86-64" 23*f6dc9357SAndroid Build Coastguard Workerelse 24*f6dc9357SAndroid Build Coastguard Worker ECHO "x86" 25*f6dc9357SAndroid Build Coastguard Workerif (IS_CDECL gt 0) 26*f6dc9357SAndroid Build Coastguard Worker ECHO "ABI : CDECL" 27*f6dc9357SAndroid Build Coastguard Workerelse 28*f6dc9357SAndroid Build Coastguard Worker ECHO "ABI : no CDECL : FASTCALL" 29*f6dc9357SAndroid Build Coastguard Workerendif 30*f6dc9357SAndroid Build Coastguard Workerendif 31*f6dc9357SAndroid Build Coastguard Worker 32*f6dc9357SAndroid Build Coastguard Workerif (IS_LINUX gt 0) 33*f6dc9357SAndroid Build Coastguard Worker ECHO "ABI : LINUX" 34*f6dc9357SAndroid Build Coastguard Workerelse 35*f6dc9357SAndroid Build Coastguard Worker ECHO "ABI : WINDOWS" 36*f6dc9357SAndroid Build Coastguard Workerendif 37*f6dc9357SAndroid Build Coastguard Worker 38*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START 39*f6dc9357SAndroid Build Coastguard Worker 40*f6dc9357SAndroid Build Coastguard Workerifndef x64 41*f6dc9357SAndroid Build Coastguard Worker .686 42*f6dc9357SAndroid Build Coastguard Worker .xmm 43*f6dc9357SAndroid Build Coastguard Workerendif 44*f6dc9357SAndroid Build Coastguard Worker 45*f6dc9357SAndroid Build Coastguard Worker 46*f6dc9357SAndroid Build Coastguard Worker; MY_ALIGN EQU ALIGN(64) 47*f6dc9357SAndroid Build Coastguard WorkerMY_ALIGN EQU 48*f6dc9357SAndroid Build Coastguard Worker 49*f6dc9357SAndroid Build Coastguard WorkerSEG_ALIGN EQU MY_ALIGN 50*f6dc9357SAndroid Build Coastguard Worker 51*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC macro name:req, numParams:req 52*f6dc9357SAndroid Build Coastguard Worker ; seg_name equ @CatStr(_TEXT$, name) 53*f6dc9357SAndroid Build Coastguard Worker ; seg_name SEGMENT SEG_ALIGN 'CODE' 54*f6dc9357SAndroid Build Coastguard Worker MY_PROC name, numParams 55*f6dc9357SAndroid Build Coastguard Workerendm 56*f6dc9357SAndroid Build Coastguard Worker 57*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP macro 58*f6dc9357SAndroid Build Coastguard Worker ; seg_name ENDS 59*f6dc9357SAndroid Build Coastguard Workerendm 60*f6dc9357SAndroid Build Coastguard Worker 61*f6dc9357SAndroid Build Coastguard Worker 62*f6dc9357SAndroid Build Coastguard WorkerNUM_AES_KEYS_MAX equ 15 63*f6dc9357SAndroid Build Coastguard Worker 64*f6dc9357SAndroid Build Coastguard Worker; the number of push operators in function PROLOG 65*f6dc9357SAndroid Build Coastguard Workerif (IS_LINUX eq 0) or (IS_X64 eq 0) 66*f6dc9357SAndroid Build Coastguard Workernum_regs_push equ 2 67*f6dc9357SAndroid Build Coastguard Workerstack_param_offset equ (REG_SIZE * (1 + num_regs_push)) 68*f6dc9357SAndroid Build Coastguard Workerendif 69*f6dc9357SAndroid Build Coastguard Worker 70*f6dc9357SAndroid Build Coastguard Workerifdef x64 71*f6dc9357SAndroid Build Coastguard Worker num_param equ REG_ABI_PARAM_2 72*f6dc9357SAndroid Build Coastguard Workerelse 73*f6dc9357SAndroid Build Coastguard Worker if (IS_CDECL gt 0) 74*f6dc9357SAndroid Build Coastguard Worker ; size_t size 75*f6dc9357SAndroid Build Coastguard Worker ; void * data 76*f6dc9357SAndroid Build Coastguard Worker ; UInt32 * aes 77*f6dc9357SAndroid Build Coastguard Worker ; ret-ip <- (r4) 78*f6dc9357SAndroid Build Coastguard Worker aes_OFFS equ (stack_param_offset) 79*f6dc9357SAndroid Build Coastguard Worker data_OFFS equ (REG_SIZE + aes_OFFS) 80*f6dc9357SAndroid Build Coastguard Worker size_OFFS equ (REG_SIZE + data_OFFS) 81*f6dc9357SAndroid Build Coastguard Worker num_param equ [r4 + size_OFFS] 82*f6dc9357SAndroid Build Coastguard Worker else 83*f6dc9357SAndroid Build Coastguard Worker num_param equ [r4 + stack_param_offset] 84*f6dc9357SAndroid Build Coastguard Worker endif 85*f6dc9357SAndroid Build Coastguard Workerendif 86*f6dc9357SAndroid Build Coastguard Worker 87*f6dc9357SAndroid Build Coastguard Workerkeys equ REG_PARAM_0 ; r1 88*f6dc9357SAndroid Build Coastguard WorkerrD equ REG_PARAM_1 ; r2 89*f6dc9357SAndroid Build Coastguard WorkerrN equ r0 90*f6dc9357SAndroid Build Coastguard Worker 91*f6dc9357SAndroid Build Coastguard Workerkoffs_x equ x7 92*f6dc9357SAndroid Build Coastguard Workerkoffs_r equ r7 93*f6dc9357SAndroid Build Coastguard Worker 94*f6dc9357SAndroid Build Coastguard Workerksize_x equ x6 95*f6dc9357SAndroid Build Coastguard Workerksize_r equ r6 96*f6dc9357SAndroid Build Coastguard Worker 97*f6dc9357SAndroid Build Coastguard Workerkeys2 equ r3 98*f6dc9357SAndroid Build Coastguard Worker 99*f6dc9357SAndroid Build Coastguard Workerstate equ xmm0 100*f6dc9357SAndroid Build Coastguard Workerkey equ xmm0 101*f6dc9357SAndroid Build Coastguard Workerkey_ymm equ ymm0 102*f6dc9357SAndroid Build Coastguard Workerkey_ymm_n equ 0 103*f6dc9357SAndroid Build Coastguard Worker 104*f6dc9357SAndroid Build Coastguard Workerifdef x64 105*f6dc9357SAndroid Build Coastguard Worker ways = 11 106*f6dc9357SAndroid Build Coastguard Workerelse 107*f6dc9357SAndroid Build Coastguard Worker ways = 4 108*f6dc9357SAndroid Build Coastguard Workerendif 109*f6dc9357SAndroid Build Coastguard Worker 110*f6dc9357SAndroid Build Coastguard Workerways_start_reg equ 1 111*f6dc9357SAndroid Build Coastguard Worker 112*f6dc9357SAndroid Build Coastguard Workeriv equ @CatStr(xmm, %(ways_start_reg + ways)) 113*f6dc9357SAndroid Build Coastguard Workeriv_ymm equ @CatStr(ymm, %(ways_start_reg + ways)) 114*f6dc9357SAndroid Build Coastguard Worker 115*f6dc9357SAndroid Build Coastguard Worker 116*f6dc9357SAndroid Build Coastguard WorkerWOP macro op, op2 117*f6dc9357SAndroid Build Coastguard Worker i = 0 118*f6dc9357SAndroid Build Coastguard Worker rept ways 119*f6dc9357SAndroid Build Coastguard Worker op @CatStr(xmm, %(ways_start_reg + i)), op2 120*f6dc9357SAndroid Build Coastguard Worker i = i + 1 121*f6dc9357SAndroid Build Coastguard Worker endm 122*f6dc9357SAndroid Build Coastguard Workerendm 123*f6dc9357SAndroid Build Coastguard Worker 124*f6dc9357SAndroid Build Coastguard Worker 125*f6dc9357SAndroid Build Coastguard Workerifndef ABI_LINUX 126*f6dc9357SAndroid Build Coastguard Workerifdef x64 127*f6dc9357SAndroid Build Coastguard Worker 128*f6dc9357SAndroid Build Coastguard Worker; we use 32 bytes of home space in stack in WIN64-x64 129*f6dc9357SAndroid Build Coastguard WorkerNUM_HOME_MM_REGS equ (32 / 16) 130*f6dc9357SAndroid Build Coastguard Worker; we preserve xmm registers starting from xmm6 in WIN64-x64 131*f6dc9357SAndroid Build Coastguard WorkerMM_START_SAVE_REG equ 6 132*f6dc9357SAndroid Build Coastguard Worker 133*f6dc9357SAndroid Build Coastguard WorkerSAVE_XMM macro num_used_mm_regs:req 134*f6dc9357SAndroid Build Coastguard Worker num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG 135*f6dc9357SAndroid Build Coastguard Worker if num_save_mm_regs GT 0 136*f6dc9357SAndroid Build Coastguard Worker num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS 137*f6dc9357SAndroid Build Coastguard Worker ; RSP is (16*x + 8) after entering the function in WIN64-x64 138*f6dc9357SAndroid Build Coastguard Worker stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16) 139*f6dc9357SAndroid Build Coastguard Worker 140*f6dc9357SAndroid Build Coastguard Worker i = 0 141*f6dc9357SAndroid Build Coastguard Worker rept num_save_mm_regs 142*f6dc9357SAndroid Build Coastguard Worker 143*f6dc9357SAndroid Build Coastguard Worker if i eq NUM_HOME_MM_REGS 144*f6dc9357SAndroid Build Coastguard Worker sub r4, stack_offset 145*f6dc9357SAndroid Build Coastguard Worker endif 146*f6dc9357SAndroid Build Coastguard Worker 147*f6dc9357SAndroid Build Coastguard Worker if i lt NUM_HOME_MM_REGS 148*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i)) 149*f6dc9357SAndroid Build Coastguard Worker else 150*f6dc9357SAndroid Build Coastguard Worker movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i)) 151*f6dc9357SAndroid Build Coastguard Worker endif 152*f6dc9357SAndroid Build Coastguard Worker 153*f6dc9357SAndroid Build Coastguard Worker i = i + 1 154*f6dc9357SAndroid Build Coastguard Worker endm 155*f6dc9357SAndroid Build Coastguard Worker endif 156*f6dc9357SAndroid Build Coastguard Workerendm 157*f6dc9357SAndroid Build Coastguard Worker 158*f6dc9357SAndroid Build Coastguard WorkerRESTORE_XMM macro num_used_mm_regs:req 159*f6dc9357SAndroid Build Coastguard Worker if num_save_mm_regs GT 0 160*f6dc9357SAndroid Build Coastguard Worker i = 0 161*f6dc9357SAndroid Build Coastguard Worker if num_save_mm_regs2 GT 0 162*f6dc9357SAndroid Build Coastguard Worker rept num_save_mm_regs2 163*f6dc9357SAndroid Build Coastguard Worker movdqa @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16] 164*f6dc9357SAndroid Build Coastguard Worker i = i + 1 165*f6dc9357SAndroid Build Coastguard Worker endm 166*f6dc9357SAndroid Build Coastguard Worker add r4, stack_offset 167*f6dc9357SAndroid Build Coastguard Worker endif 168*f6dc9357SAndroid Build Coastguard Worker 169*f6dc9357SAndroid Build Coastguard Worker num_low_regs = num_save_mm_regs - i 170*f6dc9357SAndroid Build Coastguard Worker i = 0 171*f6dc9357SAndroid Build Coastguard Worker rept num_low_regs 172*f6dc9357SAndroid Build Coastguard Worker movdqa @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16] 173*f6dc9357SAndroid Build Coastguard Worker i = i + 1 174*f6dc9357SAndroid Build Coastguard Worker endm 175*f6dc9357SAndroid Build Coastguard Worker endif 176*f6dc9357SAndroid Build Coastguard Workerendm 177*f6dc9357SAndroid Build Coastguard Worker 178*f6dc9357SAndroid Build Coastguard Workerendif ; x64 179*f6dc9357SAndroid Build Coastguard Workerendif ; ABI_LINUX 180*f6dc9357SAndroid Build Coastguard Worker 181*f6dc9357SAndroid Build Coastguard Worker 182*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG macro num_used_mm_regs:req 183*f6dc9357SAndroid Build Coastguard Worker ; num_regs_push: must be equal to the number of push operators 184*f6dc9357SAndroid Build Coastguard Worker ; push r3 185*f6dc9357SAndroid Build Coastguard Worker ; push r5 186*f6dc9357SAndroid Build Coastguard Worker if (IS_LINUX eq 0) or (IS_X64 eq 0) 187*f6dc9357SAndroid Build Coastguard Worker push r6 188*f6dc9357SAndroid Build Coastguard Worker push r7 189*f6dc9357SAndroid Build Coastguard Worker endif 190*f6dc9357SAndroid Build Coastguard Worker 191*f6dc9357SAndroid Build Coastguard Worker mov rN, num_param ; don't move it; num_param can use stack pointer (r4) 192*f6dc9357SAndroid Build Coastguard Worker 193*f6dc9357SAndroid Build Coastguard Worker if (IS_X64 eq 0) 194*f6dc9357SAndroid Build Coastguard Worker if (IS_CDECL gt 0) 195*f6dc9357SAndroid Build Coastguard Worker mov rD, [r4 + data_OFFS] 196*f6dc9357SAndroid Build Coastguard Worker mov keys, [r4 + aes_OFFS] 197*f6dc9357SAndroid Build Coastguard Worker endif 198*f6dc9357SAndroid Build Coastguard Worker elseif (IS_LINUX gt 0) 199*f6dc9357SAndroid Build Coastguard Worker MY_ABI_LINUX_TO_WIN_2 200*f6dc9357SAndroid Build Coastguard Worker endif 201*f6dc9357SAndroid Build Coastguard Worker 202*f6dc9357SAndroid Build Coastguard Worker 203*f6dc9357SAndroid Build Coastguard Worker ifndef ABI_LINUX 204*f6dc9357SAndroid Build Coastguard Worker ifdef x64 205*f6dc9357SAndroid Build Coastguard Worker SAVE_XMM num_used_mm_regs 206*f6dc9357SAndroid Build Coastguard Worker endif 207*f6dc9357SAndroid Build Coastguard Worker endif 208*f6dc9357SAndroid Build Coastguard Worker 209*f6dc9357SAndroid Build Coastguard Worker mov ksize_x, [keys + 16] 210*f6dc9357SAndroid Build Coastguard Worker shl ksize_x, 5 211*f6dc9357SAndroid Build Coastguard Workerendm 212*f6dc9357SAndroid Build Coastguard Worker 213*f6dc9357SAndroid Build Coastguard Worker 214*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG macro 215*f6dc9357SAndroid Build Coastguard Worker ifndef ABI_LINUX 216*f6dc9357SAndroid Build Coastguard Worker ifdef x64 217*f6dc9357SAndroid Build Coastguard Worker RESTORE_XMM num_save_mm_regs 218*f6dc9357SAndroid Build Coastguard Worker endif 219*f6dc9357SAndroid Build Coastguard Worker endif 220*f6dc9357SAndroid Build Coastguard Worker 221*f6dc9357SAndroid Build Coastguard Worker if (IS_LINUX eq 0) or (IS_X64 eq 0) 222*f6dc9357SAndroid Build Coastguard Worker pop r7 223*f6dc9357SAndroid Build Coastguard Worker pop r6 224*f6dc9357SAndroid Build Coastguard Worker endif 225*f6dc9357SAndroid Build Coastguard Worker ; pop r5 226*f6dc9357SAndroid Build Coastguard Worker ; pop r3 227*f6dc9357SAndroid Build Coastguard Worker MY_ENDP 228*f6dc9357SAndroid Build Coastguard Workerendm 229*f6dc9357SAndroid Build Coastguard Worker 230*f6dc9357SAndroid Build Coastguard Worker 231*f6dc9357SAndroid Build Coastguard WorkerOP_KEY macro op:req, offs:req 232*f6dc9357SAndroid Build Coastguard Worker op state, [keys + offs] 233*f6dc9357SAndroid Build Coastguard Workerendm 234*f6dc9357SAndroid Build Coastguard Worker 235*f6dc9357SAndroid Build Coastguard Worker 236*f6dc9357SAndroid Build Coastguard WorkerWOP_KEY macro op:req, offs:req 237*f6dc9357SAndroid Build Coastguard Worker movdqa key, [keys + offs] 238*f6dc9357SAndroid Build Coastguard Worker WOP op, key 239*f6dc9357SAndroid Build Coastguard Workerendm 240*f6dc9357SAndroid Build Coastguard Worker 241*f6dc9357SAndroid Build Coastguard Worker 242*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CBC Decode ---------- 243*f6dc9357SAndroid Build Coastguard Worker 244*f6dc9357SAndroid Build Coastguard Worker 245*f6dc9357SAndroid Build Coastguard WorkerXOR_WITH_DATA macro reg, _ppp_ 246*f6dc9357SAndroid Build Coastguard Worker pxor reg, [rD + i * 16] 247*f6dc9357SAndroid Build Coastguard Workerendm 248*f6dc9357SAndroid Build Coastguard Worker 249*f6dc9357SAndroid Build Coastguard WorkerWRITE_TO_DATA macro reg, _ppp_ 250*f6dc9357SAndroid Build Coastguard Worker movdqa [rD + i * 16], reg 251*f6dc9357SAndroid Build Coastguard Workerendm 252*f6dc9357SAndroid Build Coastguard Worker 253*f6dc9357SAndroid Build Coastguard Worker 254*f6dc9357SAndroid Build Coastguard Worker; state0 equ @CatStr(xmm, %(ways_start_reg)) 255*f6dc9357SAndroid Build Coastguard Worker 256*f6dc9357SAndroid Build Coastguard Workerkey0 equ @CatStr(xmm, %(ways_start_reg + ways + 1)) 257*f6dc9357SAndroid Build Coastguard Workerkey0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1)) 258*f6dc9357SAndroid Build Coastguard Worker 259*f6dc9357SAndroid Build Coastguard Workerkey_last equ @CatStr(xmm, %(ways_start_reg + ways + 2)) 260*f6dc9357SAndroid Build Coastguard Workerkey_last_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2)) 261*f6dc9357SAndroid Build Coastguard Workerkey_last_ymm_n equ (ways_start_reg + ways + 2) 262*f6dc9357SAndroid Build Coastguard Worker 263*f6dc9357SAndroid Build Coastguard WorkerNUM_CBC_REGS equ (ways_start_reg + ways + 3) 264*f6dc9357SAndroid Build Coastguard Worker 265*f6dc9357SAndroid Build Coastguard Worker 266*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCbc_Decode_HW, 3 267*f6dc9357SAndroid Build Coastguard Worker 268*f6dc9357SAndroid Build Coastguard Worker AesCbc_Decode_HW_start:: 269*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG NUM_CBC_REGS 270*f6dc9357SAndroid Build Coastguard Worker 271*f6dc9357SAndroid Build Coastguard Worker AesCbc_Decode_HW_start_2:: 272*f6dc9357SAndroid Build Coastguard Worker movdqa iv, [keys] 273*f6dc9357SAndroid Build Coastguard Worker add keys, 32 274*f6dc9357SAndroid Build Coastguard Worker 275*f6dc9357SAndroid Build Coastguard Worker movdqa key0, [keys + 1 * ksize_r] 276*f6dc9357SAndroid Build Coastguard Worker movdqa key_last, [keys] 277*f6dc9357SAndroid Build Coastguard Worker sub ksize_x, 16 278*f6dc9357SAndroid Build Coastguard Worker 279*f6dc9357SAndroid Build Coastguard Worker jmp check2 280*f6dc9357SAndroid Build Coastguard Worker align 16 281*f6dc9357SAndroid Build Coastguard Worker nextBlocks2: 282*f6dc9357SAndroid Build Coastguard Worker WOP movdqa, [rD + i * 16] 283*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, ksize_x 284*f6dc9357SAndroid Build Coastguard Worker ; WOP_KEY pxor, ksize_r + 16 285*f6dc9357SAndroid Build Coastguard Worker WOP pxor, key0 286*f6dc9357SAndroid Build Coastguard Worker ; align 16 287*f6dc9357SAndroid Build Coastguard Worker @@: 288*f6dc9357SAndroid Build Coastguard Worker WOP_KEY aesdec, 1 * koffs_r 289*f6dc9357SAndroid Build Coastguard Worker sub koffs_r, 16 290*f6dc9357SAndroid Build Coastguard Worker jnz @B 291*f6dc9357SAndroid Build Coastguard Worker ; WOP_KEY aesdeclast, 0 292*f6dc9357SAndroid Build Coastguard Worker WOP aesdeclast, key_last 293*f6dc9357SAndroid Build Coastguard Worker 294*f6dc9357SAndroid Build Coastguard Worker pxor @CatStr(xmm, %(ways_start_reg)), iv 295*f6dc9357SAndroid Build Coastguard Worker i = 1 296*f6dc9357SAndroid Build Coastguard Worker rept ways - 1 297*f6dc9357SAndroid Build Coastguard Worker pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16] 298*f6dc9357SAndroid Build Coastguard Worker i = i + 1 299*f6dc9357SAndroid Build Coastguard Worker endm 300*f6dc9357SAndroid Build Coastguard Worker movdqa iv, [rD + ways * 16 - 16] 301*f6dc9357SAndroid Build Coastguard Worker WOP WRITE_TO_DATA 302*f6dc9357SAndroid Build Coastguard Worker 303*f6dc9357SAndroid Build Coastguard Worker add rD, ways * 16 304*f6dc9357SAndroid Build Coastguard Worker AesCbc_Decode_HW_start_3:: 305*f6dc9357SAndroid Build Coastguard Worker check2: 306*f6dc9357SAndroid Build Coastguard Worker sub rN, ways 307*f6dc9357SAndroid Build Coastguard Worker jnc nextBlocks2 308*f6dc9357SAndroid Build Coastguard Worker add rN, ways 309*f6dc9357SAndroid Build Coastguard Worker 310*f6dc9357SAndroid Build Coastguard Worker sub ksize_x, 16 311*f6dc9357SAndroid Build Coastguard Worker 312*f6dc9357SAndroid Build Coastguard Worker jmp check 313*f6dc9357SAndroid Build Coastguard Worker nextBlock: 314*f6dc9357SAndroid Build Coastguard Worker movdqa state, [rD] 315*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, ksize_x 316*f6dc9357SAndroid Build Coastguard Worker ; OP_KEY pxor, 1 * ksize_r + 32 317*f6dc9357SAndroid Build Coastguard Worker pxor state, key0 318*f6dc9357SAndroid Build Coastguard Worker ; movdqa state0, [rD] 319*f6dc9357SAndroid Build Coastguard Worker ; movdqa state, key0 320*f6dc9357SAndroid Build Coastguard Worker ; pxor state, state0 321*f6dc9357SAndroid Build Coastguard Worker @@: 322*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesdec, 1 * koffs_r + 16 323*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesdec, 1 * koffs_r 324*f6dc9357SAndroid Build Coastguard Worker sub koffs_r, 32 325*f6dc9357SAndroid Build Coastguard Worker jnz @B 326*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesdec, 16 327*f6dc9357SAndroid Build Coastguard Worker ; OP_KEY aesdeclast, 0 328*f6dc9357SAndroid Build Coastguard Worker aesdeclast state, key_last 329*f6dc9357SAndroid Build Coastguard Worker 330*f6dc9357SAndroid Build Coastguard Worker pxor state, iv 331*f6dc9357SAndroid Build Coastguard Worker movdqa iv, [rD] 332*f6dc9357SAndroid Build Coastguard Worker ; movdqa iv, state0 333*f6dc9357SAndroid Build Coastguard Worker movdqa [rD], state 334*f6dc9357SAndroid Build Coastguard Worker 335*f6dc9357SAndroid Build Coastguard Worker add rD, 16 336*f6dc9357SAndroid Build Coastguard Worker check: 337*f6dc9357SAndroid Build Coastguard Worker sub rN, 1 338*f6dc9357SAndroid Build Coastguard Worker jnc nextBlock 339*f6dc9357SAndroid Build Coastguard Worker 340*f6dc9357SAndroid Build Coastguard Worker movdqa [keys - 32], iv 341*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG 342*f6dc9357SAndroid Build Coastguard Worker 343*f6dc9357SAndroid Build Coastguard Worker 344*f6dc9357SAndroid Build Coastguard Worker 345*f6dc9357SAndroid Build Coastguard Worker 346*f6dc9357SAndroid Build Coastguard Worker; ---------- AVX ---------- 347*f6dc9357SAndroid Build Coastguard Worker 348*f6dc9357SAndroid Build Coastguard Worker 349*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP_n macro op 350*f6dc9357SAndroid Build Coastguard Worker i = 0 351*f6dc9357SAndroid Build Coastguard Worker rept ways 352*f6dc9357SAndroid Build Coastguard Worker op (ways_start_reg + i) 353*f6dc9357SAndroid Build Coastguard Worker i = i + 1 354*f6dc9357SAndroid Build Coastguard Worker endm 355*f6dc9357SAndroid Build Coastguard Workerendm 356*f6dc9357SAndroid Build Coastguard Worker 357*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP macro op 358*f6dc9357SAndroid Build Coastguard Worker i = 0 359*f6dc9357SAndroid Build Coastguard Worker rept ways 360*f6dc9357SAndroid Build Coastguard Worker op @CatStr(ymm, %(ways_start_reg + i)) 361*f6dc9357SAndroid Build Coastguard Worker i = i + 1 362*f6dc9357SAndroid Build Coastguard Worker endm 363*f6dc9357SAndroid Build Coastguard Workerendm 364*f6dc9357SAndroid Build Coastguard Worker 365*f6dc9357SAndroid Build Coastguard Worker 366*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP_KEY macro op:req, offs:req 367*f6dc9357SAndroid Build Coastguard Worker vmovdqa key_ymm, ymmword ptr [keys2 + offs] 368*f6dc9357SAndroid Build Coastguard Worker AVX__WOP_n op 369*f6dc9357SAndroid Build Coastguard Workerendm 370*f6dc9357SAndroid Build Coastguard Worker 371*f6dc9357SAndroid Build Coastguard Worker 372*f6dc9357SAndroid Build Coastguard WorkerAVX__CBC_START macro reg 373*f6dc9357SAndroid Build Coastguard Worker ; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i] 374*f6dc9357SAndroid Build Coastguard Worker vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i] 375*f6dc9357SAndroid Build Coastguard Workerendm 376*f6dc9357SAndroid Build Coastguard Worker 377*f6dc9357SAndroid Build Coastguard WorkerAVX__CBC_END macro reg 378*f6dc9357SAndroid Build Coastguard Worker if i eq 0 379*f6dc9357SAndroid Build Coastguard Worker vpxor reg, reg, iv_ymm 380*f6dc9357SAndroid Build Coastguard Worker else 381*f6dc9357SAndroid Build Coastguard Worker vpxor reg, reg, ymmword ptr [rD + i * 32 - 16] 382*f6dc9357SAndroid Build Coastguard Worker endif 383*f6dc9357SAndroid Build Coastguard Workerendm 384*f6dc9357SAndroid Build Coastguard Worker 385*f6dc9357SAndroid Build Coastguard Worker 386*f6dc9357SAndroid Build Coastguard WorkerAVX__WRITE_TO_DATA macro reg 387*f6dc9357SAndroid Build Coastguard Worker vmovdqu ymmword ptr [rD + 32 * i], reg 388*f6dc9357SAndroid Build Coastguard Workerendm 389*f6dc9357SAndroid Build Coastguard Worker 390*f6dc9357SAndroid Build Coastguard WorkerAVX__XOR_WITH_DATA macro reg 391*f6dc9357SAndroid Build Coastguard Worker vpxor reg, reg, ymmword ptr [rD + 32 * i] 392*f6dc9357SAndroid Build Coastguard Workerendm 393*f6dc9357SAndroid Build Coastguard Worker 394*f6dc9357SAndroid Build Coastguard WorkerAVX__CTR_START macro reg 395*f6dc9357SAndroid Build Coastguard Worker vpaddq iv_ymm, iv_ymm, one_ymm 396*f6dc9357SAndroid Build Coastguard Worker ; vpxor reg, iv_ymm, key_ymm 397*f6dc9357SAndroid Build Coastguard Worker vpxor reg, iv_ymm, key0_ymm 398*f6dc9357SAndroid Build Coastguard Workerendm 399*f6dc9357SAndroid Build Coastguard Worker 400*f6dc9357SAndroid Build Coastguard Worker 401*f6dc9357SAndroid Build Coastguard WorkerMY_VAES_INSTR_2 macro cmd, dest, a1, a2 402*f6dc9357SAndroid Build Coastguard Worker db 0c4H 403*f6dc9357SAndroid Build Coastguard Worker db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8) 404*f6dc9357SAndroid Build Coastguard Worker db 5 + 8 * ((not (a1)) and 15) 405*f6dc9357SAndroid Build Coastguard Worker db cmd 406*f6dc9357SAndroid Build Coastguard Worker db 0c0H + 8 * ((dest) and 7) + ((a2) and 7) 407*f6dc9357SAndroid Build Coastguard Workerendm 408*f6dc9357SAndroid Build Coastguard Worker 409*f6dc9357SAndroid Build Coastguard WorkerMY_VAES_INSTR macro cmd, dest, a 410*f6dc9357SAndroid Build Coastguard Worker MY_VAES_INSTR_2 cmd, dest, dest, a 411*f6dc9357SAndroid Build Coastguard Workerendm 412*f6dc9357SAndroid Build Coastguard Worker 413*f6dc9357SAndroid Build Coastguard WorkerMY_vaesenc macro dest, a 414*f6dc9357SAndroid Build Coastguard Worker MY_VAES_INSTR 0dcH, dest, a 415*f6dc9357SAndroid Build Coastguard Workerendm 416*f6dc9357SAndroid Build Coastguard WorkerMY_vaesenclast macro dest, a 417*f6dc9357SAndroid Build Coastguard Worker MY_VAES_INSTR 0ddH, dest, a 418*f6dc9357SAndroid Build Coastguard Workerendm 419*f6dc9357SAndroid Build Coastguard WorkerMY_vaesdec macro dest, a 420*f6dc9357SAndroid Build Coastguard Worker MY_VAES_INSTR 0deH, dest, a 421*f6dc9357SAndroid Build Coastguard Workerendm 422*f6dc9357SAndroid Build Coastguard WorkerMY_vaesdeclast macro dest, a 423*f6dc9357SAndroid Build Coastguard Worker MY_VAES_INSTR 0dfH, dest, a 424*f6dc9357SAndroid Build Coastguard Workerendm 425*f6dc9357SAndroid Build Coastguard Worker 426*f6dc9357SAndroid Build Coastguard Worker 427*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_DEC macro reg 428*f6dc9357SAndroid Build Coastguard Worker MY_vaesdec reg, key_ymm_n 429*f6dc9357SAndroid Build Coastguard Workerendm 430*f6dc9357SAndroid Build Coastguard Worker 431*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_DEC_LAST_key_last macro reg 432*f6dc9357SAndroid Build Coastguard Worker ; MY_vaesdeclast reg, key_ymm_n 433*f6dc9357SAndroid Build Coastguard Worker MY_vaesdeclast reg, key_last_ymm_n 434*f6dc9357SAndroid Build Coastguard Workerendm 435*f6dc9357SAndroid Build Coastguard Worker 436*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_ENC macro reg 437*f6dc9357SAndroid Build Coastguard Worker MY_vaesenc reg, key_ymm_n 438*f6dc9357SAndroid Build Coastguard Workerendm 439*f6dc9357SAndroid Build Coastguard Worker 440*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_ENC_LAST macro reg 441*f6dc9357SAndroid Build Coastguard Worker MY_vaesenclast reg, key_ymm_n 442*f6dc9357SAndroid Build Coastguard Workerendm 443*f6dc9357SAndroid Build Coastguard Worker 444*f6dc9357SAndroid Build Coastguard WorkerAVX__vinserti128_TO_HIGH macro dest, src 445*f6dc9357SAndroid Build Coastguard Worker vinserti128 dest, dest, src, 1 446*f6dc9357SAndroid Build Coastguard Workerendm 447*f6dc9357SAndroid Build Coastguard Worker 448*f6dc9357SAndroid Build Coastguard Worker 449*f6dc9357SAndroid Build Coastguard WorkerMY_PROC AesCbc_Decode_HW_256, 3 450*f6dc9357SAndroid Build Coastguard Worker ifdef use_vaes_256 451*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG NUM_CBC_REGS 452*f6dc9357SAndroid Build Coastguard Worker 453*f6dc9357SAndroid Build Coastguard Worker cmp rN, ways * 2 454*f6dc9357SAndroid Build Coastguard Worker jb AesCbc_Decode_HW_start_2 455*f6dc9357SAndroid Build Coastguard Worker 456*f6dc9357SAndroid Build Coastguard Worker vmovdqa iv, xmmword ptr [keys] 457*f6dc9357SAndroid Build Coastguard Worker add keys, 32 458*f6dc9357SAndroid Build Coastguard Worker 459*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key0_ymm, xmmword ptr [keys + 1 * ksize_r] 460*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key_last_ymm, xmmword ptr [keys] 461*f6dc9357SAndroid Build Coastguard Worker sub ksize_x, 16 462*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, ksize_x 463*f6dc9357SAndroid Build Coastguard Worker add ksize_x, ksize_x 464*f6dc9357SAndroid Build Coastguard Worker 465*f6dc9357SAndroid Build Coastguard Worker AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32) 466*f6dc9357SAndroid Build Coastguard Worker push keys2 467*f6dc9357SAndroid Build Coastguard Worker sub r4, AVX_STACK_SUB 468*f6dc9357SAndroid Build Coastguard Worker ; sub r4, 32 469*f6dc9357SAndroid Build Coastguard Worker ; sub r4, ksize_r 470*f6dc9357SAndroid Build Coastguard Worker ; lea keys2, [r4 + 32] 471*f6dc9357SAndroid Build Coastguard Worker mov keys2, r4 472*f6dc9357SAndroid Build Coastguard Worker and keys2, -32 473*f6dc9357SAndroid Build Coastguard Worker broad: 474*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r] 475*f6dc9357SAndroid Build Coastguard Worker vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm 476*f6dc9357SAndroid Build Coastguard Worker sub koffs_r, 16 477*f6dc9357SAndroid Build Coastguard Worker ; jnc broad 478*f6dc9357SAndroid Build Coastguard Worker jnz broad 479*f6dc9357SAndroid Build Coastguard Worker 480*f6dc9357SAndroid Build Coastguard Worker sub rN, ways * 2 481*f6dc9357SAndroid Build Coastguard Worker 482*f6dc9357SAndroid Build Coastguard Worker align 16 483*f6dc9357SAndroid Build Coastguard Worker avx_cbcdec_nextBlock2: 484*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, ksize_x 485*f6dc9357SAndroid Build Coastguard Worker ; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32 486*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__CBC_START 487*f6dc9357SAndroid Build Coastguard Worker @@: 488*f6dc9357SAndroid Build Coastguard Worker AVX__WOP_KEY AVX__VAES_DEC, 1 * koffs_r 489*f6dc9357SAndroid Build Coastguard Worker sub koffs_r, 32 490*f6dc9357SAndroid Build Coastguard Worker jnz @B 491*f6dc9357SAndroid Build Coastguard Worker ; AVX__WOP_KEY AVX__VAES_DEC_LAST, 0 492*f6dc9357SAndroid Build Coastguard Worker AVX__WOP_n AVX__VAES_DEC_LAST_key_last 493*f6dc9357SAndroid Build Coastguard Worker 494*f6dc9357SAndroid Build Coastguard Worker AVX__vinserti128_TO_HIGH iv_ymm, xmmword ptr [rD] 495*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__CBC_END 496*f6dc9357SAndroid Build Coastguard Worker 497*f6dc9357SAndroid Build Coastguard Worker vmovdqa iv, xmmword ptr [rD + ways * 32 - 16] 498*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__WRITE_TO_DATA 499*f6dc9357SAndroid Build Coastguard Worker 500*f6dc9357SAndroid Build Coastguard Worker add rD, ways * 32 501*f6dc9357SAndroid Build Coastguard Worker sub rN, ways * 2 502*f6dc9357SAndroid Build Coastguard Worker jnc avx_cbcdec_nextBlock2 503*f6dc9357SAndroid Build Coastguard Worker add rN, ways * 2 504*f6dc9357SAndroid Build Coastguard Worker 505*f6dc9357SAndroid Build Coastguard Worker shr ksize_x, 1 506*f6dc9357SAndroid Build Coastguard Worker 507*f6dc9357SAndroid Build Coastguard Worker ; lea r4, [r4 + 1 * ksize_r + 32] 508*f6dc9357SAndroid Build Coastguard Worker add r4, AVX_STACK_SUB 509*f6dc9357SAndroid Build Coastguard Worker pop keys2 510*f6dc9357SAndroid Build Coastguard Worker 511*f6dc9357SAndroid Build Coastguard Worker vzeroupper 512*f6dc9357SAndroid Build Coastguard Worker jmp AesCbc_Decode_HW_start_3 513*f6dc9357SAndroid Build Coastguard Worker else 514*f6dc9357SAndroid Build Coastguard Worker jmp AesCbc_Decode_HW_start 515*f6dc9357SAndroid Build Coastguard Worker endif 516*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP 517*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP 518*f6dc9357SAndroid Build Coastguard Worker 519*f6dc9357SAndroid Build Coastguard Worker 520*f6dc9357SAndroid Build Coastguard Worker 521*f6dc9357SAndroid Build Coastguard Worker 522*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CBC Encode ---------- 523*f6dc9357SAndroid Build Coastguard Worker 524*f6dc9357SAndroid Build Coastguard Workere0 equ xmm1 525*f6dc9357SAndroid Build Coastguard Worker 526*f6dc9357SAndroid Build Coastguard WorkerCENC_START_KEY equ 2 527*f6dc9357SAndroid Build Coastguard WorkerCENC_NUM_REG_KEYS equ (3 * 2) 528*f6dc9357SAndroid Build Coastguard Worker; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS)) 529*f6dc9357SAndroid Build Coastguard Worker 530*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCbc_Encode_HW, 3 531*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0) 532*f6dc9357SAndroid Build Coastguard Worker 533*f6dc9357SAndroid Build Coastguard Worker movdqa state, [keys] 534*f6dc9357SAndroid Build Coastguard Worker add keys, 32 535*f6dc9357SAndroid Build Coastguard Worker 536*f6dc9357SAndroid Build Coastguard Worker i = 0 537*f6dc9357SAndroid Build Coastguard Worker rept CENC_NUM_REG_KEYS 538*f6dc9357SAndroid Build Coastguard Worker movdqa @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16] 539*f6dc9357SAndroid Build Coastguard Worker i = i + 1 540*f6dc9357SAndroid Build Coastguard Worker endm 541*f6dc9357SAndroid Build Coastguard Worker 542*f6dc9357SAndroid Build Coastguard Worker add keys, ksize_r 543*f6dc9357SAndroid Build Coastguard Worker neg ksize_r 544*f6dc9357SAndroid Build Coastguard Worker add ksize_r, (16 * CENC_NUM_REG_KEYS) 545*f6dc9357SAndroid Build Coastguard Worker ; movdqa last_key, [keys] 546*f6dc9357SAndroid Build Coastguard Worker jmp check_e 547*f6dc9357SAndroid Build Coastguard Worker 548*f6dc9357SAndroid Build Coastguard Worker align 16 549*f6dc9357SAndroid Build Coastguard Worker nextBlock_e: 550*f6dc9357SAndroid Build Coastguard Worker movdqa e0, [rD] 551*f6dc9357SAndroid Build Coastguard Worker mov koffs_r, ksize_r 552*f6dc9357SAndroid Build Coastguard Worker pxor e0, @CatStr(xmm, %(CENC_START_KEY)) 553*f6dc9357SAndroid Build Coastguard Worker pxor state, e0 554*f6dc9357SAndroid Build Coastguard Worker 555*f6dc9357SAndroid Build Coastguard Worker i = 1 556*f6dc9357SAndroid Build Coastguard Worker rept (CENC_NUM_REG_KEYS - 1) 557*f6dc9357SAndroid Build Coastguard Worker aesenc state, @CatStr(xmm, %(CENC_START_KEY + i)) 558*f6dc9357SAndroid Build Coastguard Worker i = i + 1 559*f6dc9357SAndroid Build Coastguard Worker endm 560*f6dc9357SAndroid Build Coastguard Worker 561*f6dc9357SAndroid Build Coastguard Worker @@: 562*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenc, 1 * koffs_r 563*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenc, 1 * koffs_r + 16 564*f6dc9357SAndroid Build Coastguard Worker add koffs_r, 32 565*f6dc9357SAndroid Build Coastguard Worker jnz @B 566*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenclast, 0 567*f6dc9357SAndroid Build Coastguard Worker ; aesenclast state, last_key 568*f6dc9357SAndroid Build Coastguard Worker 569*f6dc9357SAndroid Build Coastguard Worker movdqa [rD], state 570*f6dc9357SAndroid Build Coastguard Worker add rD, 16 571*f6dc9357SAndroid Build Coastguard Worker check_e: 572*f6dc9357SAndroid Build Coastguard Worker sub rN, 1 573*f6dc9357SAndroid Build Coastguard Worker jnc nextBlock_e 574*f6dc9357SAndroid Build Coastguard Worker 575*f6dc9357SAndroid Build Coastguard Worker ; movdqa [keys - 32], state 576*f6dc9357SAndroid Build Coastguard Worker movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state 577*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG 578*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP 579*f6dc9357SAndroid Build Coastguard Worker 580*f6dc9357SAndroid Build Coastguard Worker 581*f6dc9357SAndroid Build Coastguard Worker 582*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CTR ---------- 583*f6dc9357SAndroid Build Coastguard Worker 584*f6dc9357SAndroid Build Coastguard Workerifdef x64 585*f6dc9357SAndroid Build Coastguard Worker ; ways = 11 586*f6dc9357SAndroid Build Coastguard Workerendif 587*f6dc9357SAndroid Build Coastguard Worker 588*f6dc9357SAndroid Build Coastguard Worker 589*f6dc9357SAndroid Build Coastguard Workerone equ @CatStr(xmm, %(ways_start_reg + ways + 1)) 590*f6dc9357SAndroid Build Coastguard Workerone_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1)) 591*f6dc9357SAndroid Build Coastguard Workerkey0 equ @CatStr(xmm, %(ways_start_reg + ways + 2)) 592*f6dc9357SAndroid Build Coastguard Workerkey0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2)) 593*f6dc9357SAndroid Build Coastguard WorkerNUM_CTR_REGS equ (ways_start_reg + ways + 3) 594*f6dc9357SAndroid Build Coastguard Worker 595*f6dc9357SAndroid Build Coastguard WorkerINIT_CTR macro reg, _ppp_ 596*f6dc9357SAndroid Build Coastguard Worker paddq iv, one 597*f6dc9357SAndroid Build Coastguard Worker movdqa reg, iv 598*f6dc9357SAndroid Build Coastguard Workerendm 599*f6dc9357SAndroid Build Coastguard Worker 600*f6dc9357SAndroid Build Coastguard Worker 601*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCtr_Code_HW, 3 602*f6dc9357SAndroid Build Coastguard Worker Ctr_start:: 603*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG NUM_CTR_REGS 604*f6dc9357SAndroid Build Coastguard Worker 605*f6dc9357SAndroid Build Coastguard Worker Ctr_start_2:: 606*f6dc9357SAndroid Build Coastguard Worker movdqa iv, [keys] 607*f6dc9357SAndroid Build Coastguard Worker add keys, 32 608*f6dc9357SAndroid Build Coastguard Worker movdqa key0, [keys] 609*f6dc9357SAndroid Build Coastguard Worker 610*f6dc9357SAndroid Build Coastguard Worker add keys, ksize_r 611*f6dc9357SAndroid Build Coastguard Worker neg ksize_r 612*f6dc9357SAndroid Build Coastguard Worker add ksize_r, 16 613*f6dc9357SAndroid Build Coastguard Worker 614*f6dc9357SAndroid Build Coastguard Worker Ctr_start_3:: 615*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, 1 616*f6dc9357SAndroid Build Coastguard Worker movd one, koffs_x 617*f6dc9357SAndroid Build Coastguard Worker jmp check2_c 618*f6dc9357SAndroid Build Coastguard Worker 619*f6dc9357SAndroid Build Coastguard Worker align 16 620*f6dc9357SAndroid Build Coastguard Worker nextBlocks2_c: 621*f6dc9357SAndroid Build Coastguard Worker WOP INIT_CTR, 0 622*f6dc9357SAndroid Build Coastguard Worker mov koffs_r, ksize_r 623*f6dc9357SAndroid Build Coastguard Worker ; WOP_KEY pxor, 1 * koffs_r -16 624*f6dc9357SAndroid Build Coastguard Worker WOP pxor, key0 625*f6dc9357SAndroid Build Coastguard Worker @@: 626*f6dc9357SAndroid Build Coastguard Worker WOP_KEY aesenc, 1 * koffs_r 627*f6dc9357SAndroid Build Coastguard Worker add koffs_r, 16 628*f6dc9357SAndroid Build Coastguard Worker jnz @B 629*f6dc9357SAndroid Build Coastguard Worker WOP_KEY aesenclast, 0 630*f6dc9357SAndroid Build Coastguard Worker 631*f6dc9357SAndroid Build Coastguard Worker WOP XOR_WITH_DATA 632*f6dc9357SAndroid Build Coastguard Worker WOP WRITE_TO_DATA 633*f6dc9357SAndroid Build Coastguard Worker add rD, ways * 16 634*f6dc9357SAndroid Build Coastguard Worker check2_c: 635*f6dc9357SAndroid Build Coastguard Worker sub rN, ways 636*f6dc9357SAndroid Build Coastguard Worker jnc nextBlocks2_c 637*f6dc9357SAndroid Build Coastguard Worker add rN, ways 638*f6dc9357SAndroid Build Coastguard Worker 639*f6dc9357SAndroid Build Coastguard Worker sub keys, 16 640*f6dc9357SAndroid Build Coastguard Worker add ksize_r, 16 641*f6dc9357SAndroid Build Coastguard Worker 642*f6dc9357SAndroid Build Coastguard Worker jmp check_c 643*f6dc9357SAndroid Build Coastguard Worker 644*f6dc9357SAndroid Build Coastguard Worker ; align 16 645*f6dc9357SAndroid Build Coastguard Worker nextBlock_c: 646*f6dc9357SAndroid Build Coastguard Worker paddq iv, one 647*f6dc9357SAndroid Build Coastguard Worker ; movdqa state, [keys + 1 * koffs_r - 16] 648*f6dc9357SAndroid Build Coastguard Worker movdqa state, key0 649*f6dc9357SAndroid Build Coastguard Worker mov koffs_r, ksize_r 650*f6dc9357SAndroid Build Coastguard Worker pxor state, iv 651*f6dc9357SAndroid Build Coastguard Worker 652*f6dc9357SAndroid Build Coastguard Worker @@: 653*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenc, 1 * koffs_r 654*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenc, 1 * koffs_r + 16 655*f6dc9357SAndroid Build Coastguard Worker add koffs_r, 32 656*f6dc9357SAndroid Build Coastguard Worker jnz @B 657*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenc, 0 658*f6dc9357SAndroid Build Coastguard Worker OP_KEY aesenclast, 16 659*f6dc9357SAndroid Build Coastguard Worker 660*f6dc9357SAndroid Build Coastguard Worker pxor state, [rD] 661*f6dc9357SAndroid Build Coastguard Worker movdqa [rD], state 662*f6dc9357SAndroid Build Coastguard Worker add rD, 16 663*f6dc9357SAndroid Build Coastguard Worker check_c: 664*f6dc9357SAndroid Build Coastguard Worker sub rN, 1 665*f6dc9357SAndroid Build Coastguard Worker jnc nextBlock_c 666*f6dc9357SAndroid Build Coastguard Worker 667*f6dc9357SAndroid Build Coastguard Worker ; movdqa [keys - 32], iv 668*f6dc9357SAndroid Build Coastguard Worker movdqa [keys + 1 * ksize_r - 16 - 32], iv 669*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG 670*f6dc9357SAndroid Build Coastguard Worker 671*f6dc9357SAndroid Build Coastguard Worker 672*f6dc9357SAndroid Build Coastguard WorkerMY_PROC AesCtr_Code_HW_256, 3 673*f6dc9357SAndroid Build Coastguard Worker ifdef use_vaes_256 674*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG NUM_CTR_REGS 675*f6dc9357SAndroid Build Coastguard Worker 676*f6dc9357SAndroid Build Coastguard Worker cmp rN, ways * 2 677*f6dc9357SAndroid Build Coastguard Worker jb Ctr_start_2 678*f6dc9357SAndroid Build Coastguard Worker 679*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 iv_ymm, xmmword ptr [keys] 680*f6dc9357SAndroid Build Coastguard Worker add keys, 32 681*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key0_ymm, xmmword ptr [keys] 682*f6dc9357SAndroid Build Coastguard Worker mov koffs_x, 1 683*f6dc9357SAndroid Build Coastguard Worker vmovd one, koffs_x 684*f6dc9357SAndroid Build Coastguard Worker vpsubq iv_ymm, iv_ymm, one_ymm 685*f6dc9357SAndroid Build Coastguard Worker vpaddq one, one, one 686*f6dc9357SAndroid Build Coastguard Worker AVX__vinserti128_TO_HIGH one_ymm, one 687*f6dc9357SAndroid Build Coastguard Worker 688*f6dc9357SAndroid Build Coastguard Worker add keys, ksize_r 689*f6dc9357SAndroid Build Coastguard Worker sub ksize_x, 16 690*f6dc9357SAndroid Build Coastguard Worker neg ksize_r 691*f6dc9357SAndroid Build Coastguard Worker mov koffs_r, ksize_r 692*f6dc9357SAndroid Build Coastguard Worker add ksize_r, ksize_r 693*f6dc9357SAndroid Build Coastguard Worker 694*f6dc9357SAndroid Build Coastguard Worker AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32) 695*f6dc9357SAndroid Build Coastguard Worker push keys2 696*f6dc9357SAndroid Build Coastguard Worker lea keys2, [r4 - 32] 697*f6dc9357SAndroid Build Coastguard Worker sub r4, AVX_STACK_SUB 698*f6dc9357SAndroid Build Coastguard Worker and keys2, -32 699*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key_ymm, xmmword ptr [keys] 700*f6dc9357SAndroid Build Coastguard Worker vmovdqa ymmword ptr [keys2], key_ymm 701*f6dc9357SAndroid Build Coastguard Worker @@: 702*f6dc9357SAndroid Build Coastguard Worker vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r] 703*f6dc9357SAndroid Build Coastguard Worker vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm 704*f6dc9357SAndroid Build Coastguard Worker add koffs_r, 16 705*f6dc9357SAndroid Build Coastguard Worker jnz @B 706*f6dc9357SAndroid Build Coastguard Worker 707*f6dc9357SAndroid Build Coastguard Worker sub rN, ways * 2 708*f6dc9357SAndroid Build Coastguard Worker 709*f6dc9357SAndroid Build Coastguard Worker align 16 710*f6dc9357SAndroid Build Coastguard Worker avx_ctr_nextBlock2: 711*f6dc9357SAndroid Build Coastguard Worker mov koffs_r, ksize_r 712*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__CTR_START 713*f6dc9357SAndroid Build Coastguard Worker ; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32 714*f6dc9357SAndroid Build Coastguard Worker @@: 715*f6dc9357SAndroid Build Coastguard Worker AVX__WOP_KEY AVX__VAES_ENC, 1 * koffs_r 716*f6dc9357SAndroid Build Coastguard Worker add koffs_r, 32 717*f6dc9357SAndroid Build Coastguard Worker jnz @B 718*f6dc9357SAndroid Build Coastguard Worker AVX__WOP_KEY AVX__VAES_ENC_LAST, 0 719*f6dc9357SAndroid Build Coastguard Worker 720*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__XOR_WITH_DATA 721*f6dc9357SAndroid Build Coastguard Worker AVX__WOP AVX__WRITE_TO_DATA 722*f6dc9357SAndroid Build Coastguard Worker 723*f6dc9357SAndroid Build Coastguard Worker add rD, ways * 32 724*f6dc9357SAndroid Build Coastguard Worker sub rN, ways * 2 725*f6dc9357SAndroid Build Coastguard Worker jnc avx_ctr_nextBlock2 726*f6dc9357SAndroid Build Coastguard Worker add rN, ways * 2 727*f6dc9357SAndroid Build Coastguard Worker 728*f6dc9357SAndroid Build Coastguard Worker vextracti128 iv, iv_ymm, 1 729*f6dc9357SAndroid Build Coastguard Worker sar ksize_r, 1 730*f6dc9357SAndroid Build Coastguard Worker 731*f6dc9357SAndroid Build Coastguard Worker add r4, AVX_STACK_SUB 732*f6dc9357SAndroid Build Coastguard Worker pop keys2 733*f6dc9357SAndroid Build Coastguard Worker 734*f6dc9357SAndroid Build Coastguard Worker vzeroupper 735*f6dc9357SAndroid Build Coastguard Worker jmp Ctr_start_3 736*f6dc9357SAndroid Build Coastguard Worker else 737*f6dc9357SAndroid Build Coastguard Worker jmp Ctr_start 738*f6dc9357SAndroid Build Coastguard Worker endif 739*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP 740*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP 741*f6dc9357SAndroid Build Coastguard Worker 742*f6dc9357SAndroid Build Coastguard Workerend 743