xref: /aosp_15_r20/external/lzma/Asm/x86/AesOpt.asm (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1*f6dc9357SAndroid Build Coastguard Worker; AesOpt.asm -- AES optimized code for x86 AES hardware instructions
2*f6dc9357SAndroid Build Coastguard Worker; 2021-12-25 : Igor Pavlov : Public domain
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm
5*f6dc9357SAndroid Build Coastguard Worker
6*f6dc9357SAndroid Build Coastguard Workerifdef __ASMC__
7*f6dc9357SAndroid Build Coastguard Worker  use_vaes_256 equ 1
8*f6dc9357SAndroid Build Coastguard Workerelse
9*f6dc9357SAndroid Build Coastguard Workerifdef ymm0
10*f6dc9357SAndroid Build Coastguard Worker  use_vaes_256 equ 1
11*f6dc9357SAndroid Build Coastguard Workerendif
12*f6dc9357SAndroid Build Coastguard Workerendif
13*f6dc9357SAndroid Build Coastguard Worker
14*f6dc9357SAndroid Build Coastguard Worker
15*f6dc9357SAndroid Build Coastguard Workerifdef use_vaes_256
16*f6dc9357SAndroid Build Coastguard Worker  ECHO "++ VAES 256"
17*f6dc9357SAndroid Build Coastguard Workerelse
18*f6dc9357SAndroid Build Coastguard Worker  ECHO "-- NO VAES 256"
19*f6dc9357SAndroid Build Coastguard Workerendif
20*f6dc9357SAndroid Build Coastguard Worker
21*f6dc9357SAndroid Build Coastguard Workerifdef x64
22*f6dc9357SAndroid Build Coastguard Worker  ECHO "x86-64"
23*f6dc9357SAndroid Build Coastguard Workerelse
24*f6dc9357SAndroid Build Coastguard Worker  ECHO "x86"
25*f6dc9357SAndroid Build Coastguard Workerif (IS_CDECL gt 0)
26*f6dc9357SAndroid Build Coastguard Worker  ECHO "ABI : CDECL"
27*f6dc9357SAndroid Build Coastguard Workerelse
28*f6dc9357SAndroid Build Coastguard Worker  ECHO "ABI : no CDECL : FASTCALL"
29*f6dc9357SAndroid Build Coastguard Workerendif
30*f6dc9357SAndroid Build Coastguard Workerendif
31*f6dc9357SAndroid Build Coastguard Worker
32*f6dc9357SAndroid Build Coastguard Workerif (IS_LINUX gt 0)
33*f6dc9357SAndroid Build Coastguard Worker  ECHO "ABI : LINUX"
34*f6dc9357SAndroid Build Coastguard Workerelse
35*f6dc9357SAndroid Build Coastguard Worker  ECHO "ABI : WINDOWS"
36*f6dc9357SAndroid Build Coastguard Workerendif
37*f6dc9357SAndroid Build Coastguard Worker
38*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START
39*f6dc9357SAndroid Build Coastguard Worker
40*f6dc9357SAndroid Build Coastguard Workerifndef x64
41*f6dc9357SAndroid Build Coastguard Worker    .686
42*f6dc9357SAndroid Build Coastguard Worker    .xmm
43*f6dc9357SAndroid Build Coastguard Workerendif
44*f6dc9357SAndroid Build Coastguard Worker
45*f6dc9357SAndroid Build Coastguard Worker
46*f6dc9357SAndroid Build Coastguard Worker; MY_ALIGN EQU ALIGN(64)
47*f6dc9357SAndroid Build Coastguard WorkerMY_ALIGN EQU
48*f6dc9357SAndroid Build Coastguard Worker
49*f6dc9357SAndroid Build Coastguard WorkerSEG_ALIGN EQU MY_ALIGN
50*f6dc9357SAndroid Build Coastguard Worker
51*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC macro name:req, numParams:req
52*f6dc9357SAndroid Build Coastguard Worker    ; seg_name equ @CatStr(_TEXT$, name)
53*f6dc9357SAndroid Build Coastguard Worker    ; seg_name SEGMENT SEG_ALIGN 'CODE'
54*f6dc9357SAndroid Build Coastguard Worker    MY_PROC name, numParams
55*f6dc9357SAndroid Build Coastguard Workerendm
56*f6dc9357SAndroid Build Coastguard Worker
57*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP macro
58*f6dc9357SAndroid Build Coastguard Worker    ; seg_name ENDS
59*f6dc9357SAndroid Build Coastguard Workerendm
60*f6dc9357SAndroid Build Coastguard Worker
61*f6dc9357SAndroid Build Coastguard Worker
62*f6dc9357SAndroid Build Coastguard WorkerNUM_AES_KEYS_MAX equ 15
63*f6dc9357SAndroid Build Coastguard Worker
64*f6dc9357SAndroid Build Coastguard Worker; the number of push operators in function PROLOG
65*f6dc9357SAndroid Build Coastguard Workerif (IS_LINUX eq 0) or (IS_X64 eq 0)
66*f6dc9357SAndroid Build Coastguard Workernum_regs_push   equ 2
67*f6dc9357SAndroid Build Coastguard Workerstack_param_offset equ (REG_SIZE * (1 + num_regs_push))
68*f6dc9357SAndroid Build Coastguard Workerendif
69*f6dc9357SAndroid Build Coastguard Worker
70*f6dc9357SAndroid Build Coastguard Workerifdef x64
71*f6dc9357SAndroid Build Coastguard Worker    num_param   equ REG_ABI_PARAM_2
72*f6dc9357SAndroid Build Coastguard Workerelse
73*f6dc9357SAndroid Build Coastguard Worker  if (IS_CDECL gt 0)
74*f6dc9357SAndroid Build Coastguard Worker    ;   size_t     size
75*f6dc9357SAndroid Build Coastguard Worker    ;   void *     data
76*f6dc9357SAndroid Build Coastguard Worker    ;   UInt32 *   aes
77*f6dc9357SAndroid Build Coastguard Worker    ;   ret-ip <- (r4)
78*f6dc9357SAndroid Build Coastguard Worker    aes_OFFS    equ (stack_param_offset)
79*f6dc9357SAndroid Build Coastguard Worker    data_OFFS   equ (REG_SIZE + aes_OFFS)
80*f6dc9357SAndroid Build Coastguard Worker    size_OFFS   equ (REG_SIZE + data_OFFS)
81*f6dc9357SAndroid Build Coastguard Worker    num_param   equ [r4 + size_OFFS]
82*f6dc9357SAndroid Build Coastguard Worker  else
83*f6dc9357SAndroid Build Coastguard Worker    num_param   equ [r4 + stack_param_offset]
84*f6dc9357SAndroid Build Coastguard Worker  endif
85*f6dc9357SAndroid Build Coastguard Workerendif
86*f6dc9357SAndroid Build Coastguard Worker
87*f6dc9357SAndroid Build Coastguard Workerkeys    equ  REG_PARAM_0  ; r1
88*f6dc9357SAndroid Build Coastguard WorkerrD      equ  REG_PARAM_1  ; r2
89*f6dc9357SAndroid Build Coastguard WorkerrN      equ  r0
90*f6dc9357SAndroid Build Coastguard Worker
91*f6dc9357SAndroid Build Coastguard Workerkoffs_x equ  x7
92*f6dc9357SAndroid Build Coastguard Workerkoffs_r equ  r7
93*f6dc9357SAndroid Build Coastguard Worker
94*f6dc9357SAndroid Build Coastguard Workerksize_x equ  x6
95*f6dc9357SAndroid Build Coastguard Workerksize_r equ  r6
96*f6dc9357SAndroid Build Coastguard Worker
97*f6dc9357SAndroid Build Coastguard Workerkeys2   equ  r3
98*f6dc9357SAndroid Build Coastguard Worker
99*f6dc9357SAndroid Build Coastguard Workerstate   equ  xmm0
100*f6dc9357SAndroid Build Coastguard Workerkey     equ  xmm0
101*f6dc9357SAndroid Build Coastguard Workerkey_ymm equ  ymm0
102*f6dc9357SAndroid Build Coastguard Workerkey_ymm_n equ   0
103*f6dc9357SAndroid Build Coastguard Worker
104*f6dc9357SAndroid Build Coastguard Workerifdef x64
105*f6dc9357SAndroid Build Coastguard Worker        ways = 11
106*f6dc9357SAndroid Build Coastguard Workerelse
107*f6dc9357SAndroid Build Coastguard Worker        ways = 4
108*f6dc9357SAndroid Build Coastguard Workerendif
109*f6dc9357SAndroid Build Coastguard Worker
110*f6dc9357SAndroid Build Coastguard Workerways_start_reg equ 1
111*f6dc9357SAndroid Build Coastguard Worker
112*f6dc9357SAndroid Build Coastguard Workeriv      equ     @CatStr(xmm, %(ways_start_reg + ways))
113*f6dc9357SAndroid Build Coastguard Workeriv_ymm  equ     @CatStr(ymm, %(ways_start_reg + ways))
114*f6dc9357SAndroid Build Coastguard Worker
115*f6dc9357SAndroid Build Coastguard Worker
116*f6dc9357SAndroid Build Coastguard WorkerWOP macro op, op2
117*f6dc9357SAndroid Build Coastguard Worker    i = 0
118*f6dc9357SAndroid Build Coastguard Worker    rept ways
119*f6dc9357SAndroid Build Coastguard Worker        op      @CatStr(xmm, %(ways_start_reg + i)), op2
120*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
121*f6dc9357SAndroid Build Coastguard Worker    endm
122*f6dc9357SAndroid Build Coastguard Workerendm
123*f6dc9357SAndroid Build Coastguard Worker
124*f6dc9357SAndroid Build Coastguard Worker
125*f6dc9357SAndroid Build Coastguard Workerifndef ABI_LINUX
126*f6dc9357SAndroid Build Coastguard Workerifdef x64
127*f6dc9357SAndroid Build Coastguard Worker
128*f6dc9357SAndroid Build Coastguard Worker; we use 32 bytes of home space in stack in WIN64-x64
129*f6dc9357SAndroid Build Coastguard WorkerNUM_HOME_MM_REGS   equ (32 / 16)
130*f6dc9357SAndroid Build Coastguard Worker; we preserve xmm registers starting from xmm6 in WIN64-x64
131*f6dc9357SAndroid Build Coastguard WorkerMM_START_SAVE_REG  equ 6
132*f6dc9357SAndroid Build Coastguard Worker
133*f6dc9357SAndroid Build Coastguard WorkerSAVE_XMM macro num_used_mm_regs:req
134*f6dc9357SAndroid Build Coastguard Worker  num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG
135*f6dc9357SAndroid Build Coastguard Worker  if num_save_mm_regs GT 0
136*f6dc9357SAndroid Build Coastguard Worker    num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS
137*f6dc9357SAndroid Build Coastguard Worker    ; RSP is (16*x + 8) after entering the function in WIN64-x64
138*f6dc9357SAndroid Build Coastguard Worker    stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16)
139*f6dc9357SAndroid Build Coastguard Worker
140*f6dc9357SAndroid Build Coastguard Worker    i = 0
141*f6dc9357SAndroid Build Coastguard Worker    rept num_save_mm_regs
142*f6dc9357SAndroid Build Coastguard Worker
143*f6dc9357SAndroid Build Coastguard Worker      if i eq NUM_HOME_MM_REGS
144*f6dc9357SAndroid Build Coastguard Worker        sub  r4, stack_offset
145*f6dc9357SAndroid Build Coastguard Worker      endif
146*f6dc9357SAndroid Build Coastguard Worker
147*f6dc9357SAndroid Build Coastguard Worker      if i lt NUM_HOME_MM_REGS
148*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
149*f6dc9357SAndroid Build Coastguard Worker      else
150*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
151*f6dc9357SAndroid Build Coastguard Worker      endif
152*f6dc9357SAndroid Build Coastguard Worker
153*f6dc9357SAndroid Build Coastguard Worker      i = i + 1
154*f6dc9357SAndroid Build Coastguard Worker    endm
155*f6dc9357SAndroid Build Coastguard Worker  endif
156*f6dc9357SAndroid Build Coastguard Workerendm
157*f6dc9357SAndroid Build Coastguard Worker
158*f6dc9357SAndroid Build Coastguard WorkerRESTORE_XMM macro num_used_mm_regs:req
159*f6dc9357SAndroid Build Coastguard Worker  if num_save_mm_regs GT 0
160*f6dc9357SAndroid Build Coastguard Worker    i = 0
161*f6dc9357SAndroid Build Coastguard Worker    if num_save_mm_regs2 GT 0
162*f6dc9357SAndroid Build Coastguard Worker      rept num_save_mm_regs2
163*f6dc9357SAndroid Build Coastguard Worker        movdqa  @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16]
164*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
165*f6dc9357SAndroid Build Coastguard Worker      endm
166*f6dc9357SAndroid Build Coastguard Worker        add     r4, stack_offset
167*f6dc9357SAndroid Build Coastguard Worker    endif
168*f6dc9357SAndroid Build Coastguard Worker
169*f6dc9357SAndroid Build Coastguard Worker    num_low_regs = num_save_mm_regs - i
170*f6dc9357SAndroid Build Coastguard Worker    i = 0
171*f6dc9357SAndroid Build Coastguard Worker      rept num_low_regs
172*f6dc9357SAndroid Build Coastguard Worker        movdqa  @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16]
173*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
174*f6dc9357SAndroid Build Coastguard Worker      endm
175*f6dc9357SAndroid Build Coastguard Worker  endif
176*f6dc9357SAndroid Build Coastguard Workerendm
177*f6dc9357SAndroid Build Coastguard Worker
178*f6dc9357SAndroid Build Coastguard Workerendif ; x64
179*f6dc9357SAndroid Build Coastguard Workerendif ; ABI_LINUX
180*f6dc9357SAndroid Build Coastguard Worker
181*f6dc9357SAndroid Build Coastguard Worker
182*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG macro num_used_mm_regs:req
183*f6dc9357SAndroid Build Coastguard Worker        ; num_regs_push: must be equal to the number of push operators
184*f6dc9357SAndroid Build Coastguard Worker        ; push    r3
185*f6dc9357SAndroid Build Coastguard Worker        ; push    r5
186*f6dc9357SAndroid Build Coastguard Worker    if (IS_LINUX eq 0) or (IS_X64 eq 0)
187*f6dc9357SAndroid Build Coastguard Worker        push    r6
188*f6dc9357SAndroid Build Coastguard Worker        push    r7
189*f6dc9357SAndroid Build Coastguard Worker    endif
190*f6dc9357SAndroid Build Coastguard Worker
191*f6dc9357SAndroid Build Coastguard Worker        mov     rN, num_param  ; don't move it; num_param can use stack pointer (r4)
192*f6dc9357SAndroid Build Coastguard Worker
193*f6dc9357SAndroid Build Coastguard Worker    if (IS_X64 eq 0)
194*f6dc9357SAndroid Build Coastguard Worker      if (IS_CDECL gt 0)
195*f6dc9357SAndroid Build Coastguard Worker        mov     rD,   [r4 + data_OFFS]
196*f6dc9357SAndroid Build Coastguard Worker        mov     keys, [r4 + aes_OFFS]
197*f6dc9357SAndroid Build Coastguard Worker      endif
198*f6dc9357SAndroid Build Coastguard Worker    elseif (IS_LINUX gt 0)
199*f6dc9357SAndroid Build Coastguard Worker        MY_ABI_LINUX_TO_WIN_2
200*f6dc9357SAndroid Build Coastguard Worker    endif
201*f6dc9357SAndroid Build Coastguard Worker
202*f6dc9357SAndroid Build Coastguard Worker
203*f6dc9357SAndroid Build Coastguard Worker    ifndef ABI_LINUX
204*f6dc9357SAndroid Build Coastguard Worker    ifdef x64
205*f6dc9357SAndroid Build Coastguard Worker        SAVE_XMM num_used_mm_regs
206*f6dc9357SAndroid Build Coastguard Worker    endif
207*f6dc9357SAndroid Build Coastguard Worker    endif
208*f6dc9357SAndroid Build Coastguard Worker
209*f6dc9357SAndroid Build Coastguard Worker        mov     ksize_x, [keys + 16]
210*f6dc9357SAndroid Build Coastguard Worker        shl     ksize_x, 5
211*f6dc9357SAndroid Build Coastguard Workerendm
212*f6dc9357SAndroid Build Coastguard Worker
213*f6dc9357SAndroid Build Coastguard Worker
214*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG macro
215*f6dc9357SAndroid Build Coastguard Worker    ifndef ABI_LINUX
216*f6dc9357SAndroid Build Coastguard Worker    ifdef x64
217*f6dc9357SAndroid Build Coastguard Worker        RESTORE_XMM num_save_mm_regs
218*f6dc9357SAndroid Build Coastguard Worker    endif
219*f6dc9357SAndroid Build Coastguard Worker    endif
220*f6dc9357SAndroid Build Coastguard Worker
221*f6dc9357SAndroid Build Coastguard Worker    if (IS_LINUX eq 0) or (IS_X64 eq 0)
222*f6dc9357SAndroid Build Coastguard Worker        pop     r7
223*f6dc9357SAndroid Build Coastguard Worker        pop     r6
224*f6dc9357SAndroid Build Coastguard Worker    endif
225*f6dc9357SAndroid Build Coastguard Worker        ; pop     r5
226*f6dc9357SAndroid Build Coastguard Worker        ; pop     r3
227*f6dc9357SAndroid Build Coastguard Worker    MY_ENDP
228*f6dc9357SAndroid Build Coastguard Workerendm
229*f6dc9357SAndroid Build Coastguard Worker
230*f6dc9357SAndroid Build Coastguard Worker
231*f6dc9357SAndroid Build Coastguard WorkerOP_KEY macro op:req, offs:req
232*f6dc9357SAndroid Build Coastguard Worker        op      state, [keys + offs]
233*f6dc9357SAndroid Build Coastguard Workerendm
234*f6dc9357SAndroid Build Coastguard Worker
235*f6dc9357SAndroid Build Coastguard Worker
236*f6dc9357SAndroid Build Coastguard WorkerWOP_KEY macro op:req, offs:req
237*f6dc9357SAndroid Build Coastguard Worker        movdqa  key, [keys + offs]
238*f6dc9357SAndroid Build Coastguard Worker        WOP     op, key
239*f6dc9357SAndroid Build Coastguard Workerendm
240*f6dc9357SAndroid Build Coastguard Worker
241*f6dc9357SAndroid Build Coastguard Worker
242*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CBC Decode ----------
243*f6dc9357SAndroid Build Coastguard Worker
244*f6dc9357SAndroid Build Coastguard Worker
245*f6dc9357SAndroid Build Coastguard WorkerXOR_WITH_DATA macro reg, _ppp_
246*f6dc9357SAndroid Build Coastguard Worker        pxor    reg, [rD + i * 16]
247*f6dc9357SAndroid Build Coastguard Workerendm
248*f6dc9357SAndroid Build Coastguard Worker
249*f6dc9357SAndroid Build Coastguard WorkerWRITE_TO_DATA macro reg, _ppp_
250*f6dc9357SAndroid Build Coastguard Worker        movdqa  [rD + i * 16], reg
251*f6dc9357SAndroid Build Coastguard Workerendm
252*f6dc9357SAndroid Build Coastguard Worker
253*f6dc9357SAndroid Build Coastguard Worker
254*f6dc9357SAndroid Build Coastguard Worker; state0    equ  @CatStr(xmm, %(ways_start_reg))
255*f6dc9357SAndroid Build Coastguard Worker
256*f6dc9357SAndroid Build Coastguard Workerkey0            equ  @CatStr(xmm, %(ways_start_reg + ways + 1))
257*f6dc9357SAndroid Build Coastguard Workerkey0_ymm        equ  @CatStr(ymm, %(ways_start_reg + ways + 1))
258*f6dc9357SAndroid Build Coastguard Worker
259*f6dc9357SAndroid Build Coastguard Workerkey_last        equ  @CatStr(xmm, %(ways_start_reg + ways + 2))
260*f6dc9357SAndroid Build Coastguard Workerkey_last_ymm    equ  @CatStr(ymm, %(ways_start_reg + ways + 2))
261*f6dc9357SAndroid Build Coastguard Workerkey_last_ymm_n  equ                (ways_start_reg + ways + 2)
262*f6dc9357SAndroid Build Coastguard Worker
263*f6dc9357SAndroid Build Coastguard WorkerNUM_CBC_REGS    equ  (ways_start_reg + ways + 3)
264*f6dc9357SAndroid Build Coastguard Worker
265*f6dc9357SAndroid Build Coastguard Worker
266*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCbc_Decode_HW, 3
267*f6dc9357SAndroid Build Coastguard Worker
268*f6dc9357SAndroid Build Coastguard Worker    AesCbc_Decode_HW_start::
269*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG NUM_CBC_REGS
270*f6dc9357SAndroid Build Coastguard Worker
271*f6dc9357SAndroid Build Coastguard Worker    AesCbc_Decode_HW_start_2::
272*f6dc9357SAndroid Build Coastguard Worker        movdqa  iv, [keys]
273*f6dc9357SAndroid Build Coastguard Worker        add     keys, 32
274*f6dc9357SAndroid Build Coastguard Worker
275*f6dc9357SAndroid Build Coastguard Worker        movdqa  key0, [keys + 1 * ksize_r]
276*f6dc9357SAndroid Build Coastguard Worker        movdqa  key_last, [keys]
277*f6dc9357SAndroid Build Coastguard Worker        sub     ksize_x, 16
278*f6dc9357SAndroid Build Coastguard Worker
279*f6dc9357SAndroid Build Coastguard Worker        jmp     check2
280*f6dc9357SAndroid Build Coastguard Worker    align 16
281*f6dc9357SAndroid Build Coastguard Worker    nextBlocks2:
282*f6dc9357SAndroid Build Coastguard Worker        WOP     movdqa, [rD + i * 16]
283*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, ksize_x
284*f6dc9357SAndroid Build Coastguard Worker        ; WOP_KEY pxor, ksize_r + 16
285*f6dc9357SAndroid Build Coastguard Worker        WOP     pxor, key0
286*f6dc9357SAndroid Build Coastguard Worker    ; align 16
287*f6dc9357SAndroid Build Coastguard Worker    @@:
288*f6dc9357SAndroid Build Coastguard Worker        WOP_KEY aesdec, 1 * koffs_r
289*f6dc9357SAndroid Build Coastguard Worker        sub     koffs_r, 16
290*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
291*f6dc9357SAndroid Build Coastguard Worker        ; WOP_KEY aesdeclast, 0
292*f6dc9357SAndroid Build Coastguard Worker        WOP     aesdeclast, key_last
293*f6dc9357SAndroid Build Coastguard Worker
294*f6dc9357SAndroid Build Coastguard Worker        pxor    @CatStr(xmm, %(ways_start_reg)), iv
295*f6dc9357SAndroid Build Coastguard Worker    i = 1
296*f6dc9357SAndroid Build Coastguard Worker    rept ways - 1
297*f6dc9357SAndroid Build Coastguard Worker        pxor    @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]
298*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
299*f6dc9357SAndroid Build Coastguard Worker    endm
300*f6dc9357SAndroid Build Coastguard Worker        movdqa  iv, [rD + ways * 16 - 16]
301*f6dc9357SAndroid Build Coastguard Worker        WOP     WRITE_TO_DATA
302*f6dc9357SAndroid Build Coastguard Worker
303*f6dc9357SAndroid Build Coastguard Worker        add     rD, ways * 16
304*f6dc9357SAndroid Build Coastguard Worker    AesCbc_Decode_HW_start_3::
305*f6dc9357SAndroid Build Coastguard Worker    check2:
306*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways
307*f6dc9357SAndroid Build Coastguard Worker        jnc     nextBlocks2
308*f6dc9357SAndroid Build Coastguard Worker        add     rN, ways
309*f6dc9357SAndroid Build Coastguard Worker
310*f6dc9357SAndroid Build Coastguard Worker        sub     ksize_x, 16
311*f6dc9357SAndroid Build Coastguard Worker
312*f6dc9357SAndroid Build Coastguard Worker        jmp     check
313*f6dc9357SAndroid Build Coastguard Worker    nextBlock:
314*f6dc9357SAndroid Build Coastguard Worker        movdqa  state, [rD]
315*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, ksize_x
316*f6dc9357SAndroid Build Coastguard Worker        ; OP_KEY  pxor, 1 * ksize_r + 32
317*f6dc9357SAndroid Build Coastguard Worker        pxor    state, key0
318*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  state0, [rD]
319*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  state, key0
320*f6dc9357SAndroid Build Coastguard Worker        ; pxor    state, state0
321*f6dc9357SAndroid Build Coastguard Worker    @@:
322*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesdec, 1 * koffs_r + 16
323*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesdec, 1 * koffs_r
324*f6dc9357SAndroid Build Coastguard Worker        sub     koffs_r, 32
325*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
326*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesdec, 16
327*f6dc9357SAndroid Build Coastguard Worker        ; OP_KEY  aesdeclast, 0
328*f6dc9357SAndroid Build Coastguard Worker        aesdeclast state, key_last
329*f6dc9357SAndroid Build Coastguard Worker
330*f6dc9357SAndroid Build Coastguard Worker        pxor    state, iv
331*f6dc9357SAndroid Build Coastguard Worker        movdqa  iv, [rD]
332*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  iv, state0
333*f6dc9357SAndroid Build Coastguard Worker        movdqa  [rD], state
334*f6dc9357SAndroid Build Coastguard Worker
335*f6dc9357SAndroid Build Coastguard Worker        add     rD, 16
336*f6dc9357SAndroid Build Coastguard Worker    check:
337*f6dc9357SAndroid Build Coastguard Worker        sub     rN, 1
338*f6dc9357SAndroid Build Coastguard Worker        jnc     nextBlock
339*f6dc9357SAndroid Build Coastguard Worker
340*f6dc9357SAndroid Build Coastguard Worker        movdqa  [keys - 32], iv
341*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG
342*f6dc9357SAndroid Build Coastguard Worker
343*f6dc9357SAndroid Build Coastguard Worker
344*f6dc9357SAndroid Build Coastguard Worker
345*f6dc9357SAndroid Build Coastguard Worker
346*f6dc9357SAndroid Build Coastguard Worker; ---------- AVX ----------
347*f6dc9357SAndroid Build Coastguard Worker
348*f6dc9357SAndroid Build Coastguard Worker
349*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP_n macro op
350*f6dc9357SAndroid Build Coastguard Worker    i = 0
351*f6dc9357SAndroid Build Coastguard Worker    rept ways
352*f6dc9357SAndroid Build Coastguard Worker        op      (ways_start_reg + i)
353*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
354*f6dc9357SAndroid Build Coastguard Worker    endm
355*f6dc9357SAndroid Build Coastguard Workerendm
356*f6dc9357SAndroid Build Coastguard Worker
357*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP macro op
358*f6dc9357SAndroid Build Coastguard Worker    i = 0
359*f6dc9357SAndroid Build Coastguard Worker    rept ways
360*f6dc9357SAndroid Build Coastguard Worker        op      @CatStr(ymm, %(ways_start_reg + i))
361*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
362*f6dc9357SAndroid Build Coastguard Worker    endm
363*f6dc9357SAndroid Build Coastguard Workerendm
364*f6dc9357SAndroid Build Coastguard Worker
365*f6dc9357SAndroid Build Coastguard Worker
366*f6dc9357SAndroid Build Coastguard WorkerAVX__WOP_KEY macro op:req, offs:req
367*f6dc9357SAndroid Build Coastguard Worker        vmovdqa  key_ymm, ymmword ptr [keys2 + offs]
368*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP_n op
369*f6dc9357SAndroid Build Coastguard Workerendm
370*f6dc9357SAndroid Build Coastguard Worker
371*f6dc9357SAndroid Build Coastguard Worker
372*f6dc9357SAndroid Build Coastguard WorkerAVX__CBC_START macro reg
373*f6dc9357SAndroid Build Coastguard Worker        ; vpxor   reg, key_ymm, ymmword ptr [rD + 32 * i]
374*f6dc9357SAndroid Build Coastguard Worker        vpxor   reg, key0_ymm, ymmword ptr [rD + 32 * i]
375*f6dc9357SAndroid Build Coastguard Workerendm
376*f6dc9357SAndroid Build Coastguard Worker
377*f6dc9357SAndroid Build Coastguard WorkerAVX__CBC_END macro reg
378*f6dc9357SAndroid Build Coastguard Worker    if i eq 0
379*f6dc9357SAndroid Build Coastguard Worker        vpxor   reg, reg, iv_ymm
380*f6dc9357SAndroid Build Coastguard Worker    else
381*f6dc9357SAndroid Build Coastguard Worker        vpxor   reg, reg, ymmword ptr [rD + i * 32 - 16]
382*f6dc9357SAndroid Build Coastguard Worker    endif
383*f6dc9357SAndroid Build Coastguard Workerendm
384*f6dc9357SAndroid Build Coastguard Worker
385*f6dc9357SAndroid Build Coastguard Worker
386*f6dc9357SAndroid Build Coastguard WorkerAVX__WRITE_TO_DATA macro reg
387*f6dc9357SAndroid Build Coastguard Worker        vmovdqu ymmword ptr [rD + 32 * i], reg
388*f6dc9357SAndroid Build Coastguard Workerendm
389*f6dc9357SAndroid Build Coastguard Worker
390*f6dc9357SAndroid Build Coastguard WorkerAVX__XOR_WITH_DATA macro reg
391*f6dc9357SAndroid Build Coastguard Worker        vpxor   reg, reg, ymmword ptr [rD + 32 * i]
392*f6dc9357SAndroid Build Coastguard Workerendm
393*f6dc9357SAndroid Build Coastguard Worker
394*f6dc9357SAndroid Build Coastguard WorkerAVX__CTR_START macro reg
395*f6dc9357SAndroid Build Coastguard Worker        vpaddq  iv_ymm, iv_ymm, one_ymm
396*f6dc9357SAndroid Build Coastguard Worker        ; vpxor   reg, iv_ymm, key_ymm
397*f6dc9357SAndroid Build Coastguard Worker        vpxor   reg, iv_ymm, key0_ymm
398*f6dc9357SAndroid Build Coastguard Workerendm
399*f6dc9357SAndroid Build Coastguard Worker
400*f6dc9357SAndroid Build Coastguard Worker
401*f6dc9357SAndroid Build Coastguard WorkerMY_VAES_INSTR_2 macro cmd, dest, a1, a2
402*f6dc9357SAndroid Build Coastguard Worker  db 0c4H
403*f6dc9357SAndroid Build Coastguard Worker  db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)
404*f6dc9357SAndroid Build Coastguard Worker  db 5 + 8 * ((not (a1)) and 15)
405*f6dc9357SAndroid Build Coastguard Worker  db cmd
406*f6dc9357SAndroid Build Coastguard Worker  db 0c0H + 8 * ((dest) and 7) + ((a2) and 7)
407*f6dc9357SAndroid Build Coastguard Workerendm
408*f6dc9357SAndroid Build Coastguard Worker
409*f6dc9357SAndroid Build Coastguard WorkerMY_VAES_INSTR macro cmd, dest, a
410*f6dc9357SAndroid Build Coastguard Worker        MY_VAES_INSTR_2  cmd, dest, dest, a
411*f6dc9357SAndroid Build Coastguard Workerendm
412*f6dc9357SAndroid Build Coastguard Worker
413*f6dc9357SAndroid Build Coastguard WorkerMY_vaesenc macro dest, a
414*f6dc9357SAndroid Build Coastguard Worker        MY_VAES_INSTR  0dcH, dest, a
415*f6dc9357SAndroid Build Coastguard Workerendm
416*f6dc9357SAndroid Build Coastguard WorkerMY_vaesenclast macro dest, a
417*f6dc9357SAndroid Build Coastguard Worker        MY_VAES_INSTR  0ddH, dest, a
418*f6dc9357SAndroid Build Coastguard Workerendm
419*f6dc9357SAndroid Build Coastguard WorkerMY_vaesdec macro dest, a
420*f6dc9357SAndroid Build Coastguard Worker        MY_VAES_INSTR  0deH, dest, a
421*f6dc9357SAndroid Build Coastguard Workerendm
422*f6dc9357SAndroid Build Coastguard WorkerMY_vaesdeclast macro dest, a
423*f6dc9357SAndroid Build Coastguard Worker        MY_VAES_INSTR  0dfH, dest, a
424*f6dc9357SAndroid Build Coastguard Workerendm
425*f6dc9357SAndroid Build Coastguard Worker
426*f6dc9357SAndroid Build Coastguard Worker
427*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_DEC macro reg
428*f6dc9357SAndroid Build Coastguard Worker        MY_vaesdec reg, key_ymm_n
429*f6dc9357SAndroid Build Coastguard Workerendm
430*f6dc9357SAndroid Build Coastguard Worker
431*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_DEC_LAST_key_last macro reg
432*f6dc9357SAndroid Build Coastguard Worker        ; MY_vaesdeclast reg, key_ymm_n
433*f6dc9357SAndroid Build Coastguard Worker        MY_vaesdeclast reg, key_last_ymm_n
434*f6dc9357SAndroid Build Coastguard Workerendm
435*f6dc9357SAndroid Build Coastguard Worker
436*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_ENC macro reg
437*f6dc9357SAndroid Build Coastguard Worker        MY_vaesenc reg, key_ymm_n
438*f6dc9357SAndroid Build Coastguard Workerendm
439*f6dc9357SAndroid Build Coastguard Worker
440*f6dc9357SAndroid Build Coastguard WorkerAVX__VAES_ENC_LAST macro reg
441*f6dc9357SAndroid Build Coastguard Worker        MY_vaesenclast reg, key_ymm_n
442*f6dc9357SAndroid Build Coastguard Workerendm
443*f6dc9357SAndroid Build Coastguard Worker
444*f6dc9357SAndroid Build Coastguard WorkerAVX__vinserti128_TO_HIGH macro dest, src
445*f6dc9357SAndroid Build Coastguard Worker        vinserti128  dest, dest, src, 1
446*f6dc9357SAndroid Build Coastguard Workerendm
447*f6dc9357SAndroid Build Coastguard Worker
448*f6dc9357SAndroid Build Coastguard Worker
449*f6dc9357SAndroid Build Coastguard WorkerMY_PROC AesCbc_Decode_HW_256, 3
450*f6dc9357SAndroid Build Coastguard Worker  ifdef use_vaes_256
451*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG NUM_CBC_REGS
452*f6dc9357SAndroid Build Coastguard Worker
453*f6dc9357SAndroid Build Coastguard Worker        cmp    rN, ways * 2
454*f6dc9357SAndroid Build Coastguard Worker        jb     AesCbc_Decode_HW_start_2
455*f6dc9357SAndroid Build Coastguard Worker
456*f6dc9357SAndroid Build Coastguard Worker        vmovdqa iv, xmmword ptr [keys]
457*f6dc9357SAndroid Build Coastguard Worker        add     keys, 32
458*f6dc9357SAndroid Build Coastguard Worker
459*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key0_ymm, xmmword ptr [keys + 1 * ksize_r]
460*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key_last_ymm, xmmword ptr [keys]
461*f6dc9357SAndroid Build Coastguard Worker        sub     ksize_x, 16
462*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, ksize_x
463*f6dc9357SAndroid Build Coastguard Worker        add     ksize_x, ksize_x
464*f6dc9357SAndroid Build Coastguard Worker
465*f6dc9357SAndroid Build Coastguard Worker        AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)
466*f6dc9357SAndroid Build Coastguard Worker        push    keys2
467*f6dc9357SAndroid Build Coastguard Worker        sub     r4, AVX_STACK_SUB
468*f6dc9357SAndroid Build Coastguard Worker        ; sub     r4, 32
469*f6dc9357SAndroid Build Coastguard Worker        ; sub     r4, ksize_r
470*f6dc9357SAndroid Build Coastguard Worker        ; lea     keys2, [r4 + 32]
471*f6dc9357SAndroid Build Coastguard Worker        mov     keys2, r4
472*f6dc9357SAndroid Build Coastguard Worker        and     keys2, -32
473*f6dc9357SAndroid Build Coastguard Worker    broad:
474*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key_ymm, xmmword ptr [keys + 1 * koffs_r]
475*f6dc9357SAndroid Build Coastguard Worker        vmovdqa         ymmword ptr [keys2 + koffs_r * 2], key_ymm
476*f6dc9357SAndroid Build Coastguard Worker        sub     koffs_r, 16
477*f6dc9357SAndroid Build Coastguard Worker        ; jnc     broad
478*f6dc9357SAndroid Build Coastguard Worker        jnz     broad
479*f6dc9357SAndroid Build Coastguard Worker
480*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways * 2
481*f6dc9357SAndroid Build Coastguard Worker
482*f6dc9357SAndroid Build Coastguard Worker    align 16
483*f6dc9357SAndroid Build Coastguard Worker    avx_cbcdec_nextBlock2:
484*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, ksize_x
485*f6dc9357SAndroid Build Coastguard Worker        ; AVX__WOP_KEY    AVX__CBC_START, 1 * koffs_r + 32
486*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP    AVX__CBC_START
487*f6dc9357SAndroid Build Coastguard Worker    @@:
488*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP_KEY    AVX__VAES_DEC, 1 * koffs_r
489*f6dc9357SAndroid Build Coastguard Worker        sub     koffs_r, 32
490*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
491*f6dc9357SAndroid Build Coastguard Worker        ; AVX__WOP_KEY    AVX__VAES_DEC_LAST, 0
492*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP_n   AVX__VAES_DEC_LAST_key_last
493*f6dc9357SAndroid Build Coastguard Worker
494*f6dc9357SAndroid Build Coastguard Worker        AVX__vinserti128_TO_HIGH  iv_ymm, xmmword ptr [rD]
495*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP        AVX__CBC_END
496*f6dc9357SAndroid Build Coastguard Worker
497*f6dc9357SAndroid Build Coastguard Worker        vmovdqa         iv, xmmword ptr [rD + ways * 32 - 16]
498*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP        AVX__WRITE_TO_DATA
499*f6dc9357SAndroid Build Coastguard Worker
500*f6dc9357SAndroid Build Coastguard Worker        add     rD, ways * 32
501*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways * 2
502*f6dc9357SAndroid Build Coastguard Worker        jnc     avx_cbcdec_nextBlock2
503*f6dc9357SAndroid Build Coastguard Worker        add     rN, ways * 2
504*f6dc9357SAndroid Build Coastguard Worker
505*f6dc9357SAndroid Build Coastguard Worker        shr     ksize_x, 1
506*f6dc9357SAndroid Build Coastguard Worker
507*f6dc9357SAndroid Build Coastguard Worker        ; lea     r4, [r4 + 1 * ksize_r + 32]
508*f6dc9357SAndroid Build Coastguard Worker        add     r4, AVX_STACK_SUB
509*f6dc9357SAndroid Build Coastguard Worker        pop     keys2
510*f6dc9357SAndroid Build Coastguard Worker
511*f6dc9357SAndroid Build Coastguard Worker        vzeroupper
512*f6dc9357SAndroid Build Coastguard Worker        jmp     AesCbc_Decode_HW_start_3
513*f6dc9357SAndroid Build Coastguard Worker  else
514*f6dc9357SAndroid Build Coastguard Worker        jmp     AesCbc_Decode_HW_start
515*f6dc9357SAndroid Build Coastguard Worker  endif
516*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP
517*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP
518*f6dc9357SAndroid Build Coastguard Worker
519*f6dc9357SAndroid Build Coastguard Worker
520*f6dc9357SAndroid Build Coastguard Worker
521*f6dc9357SAndroid Build Coastguard Worker
522*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CBC Encode ----------
523*f6dc9357SAndroid Build Coastguard Worker
524*f6dc9357SAndroid Build Coastguard Workere0  equ  xmm1
525*f6dc9357SAndroid Build Coastguard Worker
526*f6dc9357SAndroid Build Coastguard WorkerCENC_START_KEY     equ 2
527*f6dc9357SAndroid Build Coastguard WorkerCENC_NUM_REG_KEYS  equ (3 * 2)
528*f6dc9357SAndroid Build Coastguard Worker; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS))
529*f6dc9357SAndroid Build Coastguard Worker
530*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCbc_Encode_HW, 3
531*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0)
532*f6dc9357SAndroid Build Coastguard Worker
533*f6dc9357SAndroid Build Coastguard Worker        movdqa  state, [keys]
534*f6dc9357SAndroid Build Coastguard Worker        add     keys, 32
535*f6dc9357SAndroid Build Coastguard Worker
536*f6dc9357SAndroid Build Coastguard Worker    i = 0
537*f6dc9357SAndroid Build Coastguard Worker    rept CENC_NUM_REG_KEYS
538*f6dc9357SAndroid Build Coastguard Worker        movdqa  @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16]
539*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
540*f6dc9357SAndroid Build Coastguard Worker    endm
541*f6dc9357SAndroid Build Coastguard Worker
542*f6dc9357SAndroid Build Coastguard Worker        add     keys, ksize_r
543*f6dc9357SAndroid Build Coastguard Worker        neg     ksize_r
544*f6dc9357SAndroid Build Coastguard Worker        add     ksize_r, (16 * CENC_NUM_REG_KEYS)
545*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  last_key, [keys]
546*f6dc9357SAndroid Build Coastguard Worker        jmp     check_e
547*f6dc9357SAndroid Build Coastguard Worker
548*f6dc9357SAndroid Build Coastguard Worker    align 16
549*f6dc9357SAndroid Build Coastguard Worker    nextBlock_e:
550*f6dc9357SAndroid Build Coastguard Worker        movdqa  e0, [rD]
551*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_r, ksize_r
552*f6dc9357SAndroid Build Coastguard Worker        pxor    e0, @CatStr(xmm, %(CENC_START_KEY))
553*f6dc9357SAndroid Build Coastguard Worker        pxor    state, e0
554*f6dc9357SAndroid Build Coastguard Worker
555*f6dc9357SAndroid Build Coastguard Worker    i = 1
556*f6dc9357SAndroid Build Coastguard Worker    rept (CENC_NUM_REG_KEYS - 1)
557*f6dc9357SAndroid Build Coastguard Worker        aesenc  state, @CatStr(xmm, %(CENC_START_KEY + i))
558*f6dc9357SAndroid Build Coastguard Worker        i = i + 1
559*f6dc9357SAndroid Build Coastguard Worker    endm
560*f6dc9357SAndroid Build Coastguard Worker
561*f6dc9357SAndroid Build Coastguard Worker    @@:
562*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenc, 1 * koffs_r
563*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenc, 1 * koffs_r + 16
564*f6dc9357SAndroid Build Coastguard Worker        add     koffs_r, 32
565*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
566*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenclast, 0
567*f6dc9357SAndroid Build Coastguard Worker        ; aesenclast state, last_key
568*f6dc9357SAndroid Build Coastguard Worker
569*f6dc9357SAndroid Build Coastguard Worker        movdqa  [rD], state
570*f6dc9357SAndroid Build Coastguard Worker        add     rD, 16
571*f6dc9357SAndroid Build Coastguard Worker    check_e:
572*f6dc9357SAndroid Build Coastguard Worker        sub     rN, 1
573*f6dc9357SAndroid Build Coastguard Worker        jnc     nextBlock_e
574*f6dc9357SAndroid Build Coastguard Worker
575*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  [keys - 32], state
576*f6dc9357SAndroid Build Coastguard Worker        movdqa  [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state
577*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG
578*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP
579*f6dc9357SAndroid Build Coastguard Worker
580*f6dc9357SAndroid Build Coastguard Worker
581*f6dc9357SAndroid Build Coastguard Worker
582*f6dc9357SAndroid Build Coastguard Worker; ---------- AES-CTR ----------
583*f6dc9357SAndroid Build Coastguard Worker
584*f6dc9357SAndroid Build Coastguard Workerifdef x64
585*f6dc9357SAndroid Build Coastguard Worker        ; ways = 11
586*f6dc9357SAndroid Build Coastguard Workerendif
587*f6dc9357SAndroid Build Coastguard Worker
588*f6dc9357SAndroid Build Coastguard Worker
589*f6dc9357SAndroid Build Coastguard Workerone             equ  @CatStr(xmm, %(ways_start_reg + ways + 1))
590*f6dc9357SAndroid Build Coastguard Workerone_ymm         equ  @CatStr(ymm, %(ways_start_reg + ways + 1))
591*f6dc9357SAndroid Build Coastguard Workerkey0            equ  @CatStr(xmm, %(ways_start_reg + ways + 2))
592*f6dc9357SAndroid Build Coastguard Workerkey0_ymm        equ  @CatStr(ymm, %(ways_start_reg + ways + 2))
593*f6dc9357SAndroid Build Coastguard WorkerNUM_CTR_REGS    equ  (ways_start_reg + ways + 3)
594*f6dc9357SAndroid Build Coastguard Worker
595*f6dc9357SAndroid Build Coastguard WorkerINIT_CTR macro reg, _ppp_
596*f6dc9357SAndroid Build Coastguard Worker        paddq   iv, one
597*f6dc9357SAndroid Build Coastguard Worker        movdqa  reg, iv
598*f6dc9357SAndroid Build Coastguard Workerendm
599*f6dc9357SAndroid Build Coastguard Worker
600*f6dc9357SAndroid Build Coastguard Worker
601*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_PROC AesCtr_Code_HW, 3
602*f6dc9357SAndroid Build Coastguard Worker    Ctr_start::
603*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG NUM_CTR_REGS
604*f6dc9357SAndroid Build Coastguard Worker
605*f6dc9357SAndroid Build Coastguard Worker    Ctr_start_2::
606*f6dc9357SAndroid Build Coastguard Worker        movdqa  iv, [keys]
607*f6dc9357SAndroid Build Coastguard Worker        add     keys, 32
608*f6dc9357SAndroid Build Coastguard Worker        movdqa  key0, [keys]
609*f6dc9357SAndroid Build Coastguard Worker
610*f6dc9357SAndroid Build Coastguard Worker        add     keys, ksize_r
611*f6dc9357SAndroid Build Coastguard Worker        neg     ksize_r
612*f6dc9357SAndroid Build Coastguard Worker        add     ksize_r, 16
613*f6dc9357SAndroid Build Coastguard Worker
614*f6dc9357SAndroid Build Coastguard Worker    Ctr_start_3::
615*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, 1
616*f6dc9357SAndroid Build Coastguard Worker        movd    one, koffs_x
617*f6dc9357SAndroid Build Coastguard Worker        jmp     check2_c
618*f6dc9357SAndroid Build Coastguard Worker
619*f6dc9357SAndroid Build Coastguard Worker    align 16
620*f6dc9357SAndroid Build Coastguard Worker    nextBlocks2_c:
621*f6dc9357SAndroid Build Coastguard Worker        WOP     INIT_CTR, 0
622*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_r, ksize_r
623*f6dc9357SAndroid Build Coastguard Worker        ; WOP_KEY pxor, 1 * koffs_r -16
624*f6dc9357SAndroid Build Coastguard Worker        WOP     pxor, key0
625*f6dc9357SAndroid Build Coastguard Worker    @@:
626*f6dc9357SAndroid Build Coastguard Worker        WOP_KEY aesenc, 1 * koffs_r
627*f6dc9357SAndroid Build Coastguard Worker        add     koffs_r, 16
628*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
629*f6dc9357SAndroid Build Coastguard Worker        WOP_KEY aesenclast, 0
630*f6dc9357SAndroid Build Coastguard Worker
631*f6dc9357SAndroid Build Coastguard Worker        WOP     XOR_WITH_DATA
632*f6dc9357SAndroid Build Coastguard Worker        WOP     WRITE_TO_DATA
633*f6dc9357SAndroid Build Coastguard Worker        add     rD, ways * 16
634*f6dc9357SAndroid Build Coastguard Worker    check2_c:
635*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways
636*f6dc9357SAndroid Build Coastguard Worker        jnc     nextBlocks2_c
637*f6dc9357SAndroid Build Coastguard Worker        add     rN, ways
638*f6dc9357SAndroid Build Coastguard Worker
639*f6dc9357SAndroid Build Coastguard Worker        sub     keys, 16
640*f6dc9357SAndroid Build Coastguard Worker        add     ksize_r, 16
641*f6dc9357SAndroid Build Coastguard Worker
642*f6dc9357SAndroid Build Coastguard Worker        jmp     check_c
643*f6dc9357SAndroid Build Coastguard Worker
644*f6dc9357SAndroid Build Coastguard Worker    ; align 16
645*f6dc9357SAndroid Build Coastguard Worker    nextBlock_c:
646*f6dc9357SAndroid Build Coastguard Worker        paddq   iv, one
647*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  state, [keys + 1 * koffs_r - 16]
648*f6dc9357SAndroid Build Coastguard Worker        movdqa  state, key0
649*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_r, ksize_r
650*f6dc9357SAndroid Build Coastguard Worker        pxor    state, iv
651*f6dc9357SAndroid Build Coastguard Worker
652*f6dc9357SAndroid Build Coastguard Worker    @@:
653*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenc, 1 * koffs_r
654*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenc, 1 * koffs_r + 16
655*f6dc9357SAndroid Build Coastguard Worker        add     koffs_r, 32
656*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
657*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenc, 0
658*f6dc9357SAndroid Build Coastguard Worker        OP_KEY  aesenclast, 16
659*f6dc9357SAndroid Build Coastguard Worker
660*f6dc9357SAndroid Build Coastguard Worker        pxor    state, [rD]
661*f6dc9357SAndroid Build Coastguard Worker        movdqa  [rD], state
662*f6dc9357SAndroid Build Coastguard Worker        add     rD, 16
663*f6dc9357SAndroid Build Coastguard Worker    check_c:
664*f6dc9357SAndroid Build Coastguard Worker        sub     rN, 1
665*f6dc9357SAndroid Build Coastguard Worker        jnc     nextBlock_c
666*f6dc9357SAndroid Build Coastguard Worker
667*f6dc9357SAndroid Build Coastguard Worker        ; movdqa  [keys - 32], iv
668*f6dc9357SAndroid Build Coastguard Worker        movdqa  [keys + 1 * ksize_r - 16 - 32], iv
669*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG
670*f6dc9357SAndroid Build Coastguard Worker
671*f6dc9357SAndroid Build Coastguard Worker
672*f6dc9357SAndroid Build Coastguard WorkerMY_PROC AesCtr_Code_HW_256, 3
673*f6dc9357SAndroid Build Coastguard Worker  ifdef use_vaes_256
674*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG NUM_CTR_REGS
675*f6dc9357SAndroid Build Coastguard Worker
676*f6dc9357SAndroid Build Coastguard Worker        cmp    rN, ways * 2
677*f6dc9357SAndroid Build Coastguard Worker        jb     Ctr_start_2
678*f6dc9357SAndroid Build Coastguard Worker
679*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  iv_ymm, xmmword ptr [keys]
680*f6dc9357SAndroid Build Coastguard Worker        add     keys, 32
681*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key0_ymm, xmmword ptr [keys]
682*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_x, 1
683*f6dc9357SAndroid Build Coastguard Worker        vmovd           one, koffs_x
684*f6dc9357SAndroid Build Coastguard Worker        vpsubq  iv_ymm, iv_ymm, one_ymm
685*f6dc9357SAndroid Build Coastguard Worker        vpaddq  one, one, one
686*f6dc9357SAndroid Build Coastguard Worker        AVX__vinserti128_TO_HIGH     one_ymm, one
687*f6dc9357SAndroid Build Coastguard Worker
688*f6dc9357SAndroid Build Coastguard Worker        add     keys, ksize_r
689*f6dc9357SAndroid Build Coastguard Worker        sub     ksize_x, 16
690*f6dc9357SAndroid Build Coastguard Worker        neg     ksize_r
691*f6dc9357SAndroid Build Coastguard Worker        mov     koffs_r, ksize_r
692*f6dc9357SAndroid Build Coastguard Worker        add     ksize_r, ksize_r
693*f6dc9357SAndroid Build Coastguard Worker
694*f6dc9357SAndroid Build Coastguard Worker        AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)
695*f6dc9357SAndroid Build Coastguard Worker        push    keys2
696*f6dc9357SAndroid Build Coastguard Worker        lea     keys2, [r4 - 32]
697*f6dc9357SAndroid Build Coastguard Worker        sub     r4, AVX_STACK_SUB
698*f6dc9357SAndroid Build Coastguard Worker        and     keys2, -32
699*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key_ymm, xmmword ptr [keys]
700*f6dc9357SAndroid Build Coastguard Worker        vmovdqa         ymmword ptr [keys2], key_ymm
701*f6dc9357SAndroid Build Coastguard Worker     @@:
702*f6dc9357SAndroid Build Coastguard Worker        vbroadcasti128  key_ymm, xmmword ptr [keys + 1 * koffs_r]
703*f6dc9357SAndroid Build Coastguard Worker        vmovdqa         ymmword ptr [keys2 + koffs_r * 2], key_ymm
704*f6dc9357SAndroid Build Coastguard Worker        add     koffs_r, 16
705*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
706*f6dc9357SAndroid Build Coastguard Worker
707*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways * 2
708*f6dc9357SAndroid Build Coastguard Worker
709*f6dc9357SAndroid Build Coastguard Worker    align 16
710*f6dc9357SAndroid Build Coastguard Worker    avx_ctr_nextBlock2:
711*f6dc9357SAndroid Build Coastguard Worker        mov             koffs_r, ksize_r
712*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP        AVX__CTR_START
713*f6dc9357SAndroid Build Coastguard Worker        ; AVX__WOP_KEY    AVX__CTR_START, 1 * koffs_r - 32
714*f6dc9357SAndroid Build Coastguard Worker    @@:
715*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP_KEY    AVX__VAES_ENC, 1 * koffs_r
716*f6dc9357SAndroid Build Coastguard Worker        add     koffs_r, 32
717*f6dc9357SAndroid Build Coastguard Worker        jnz     @B
718*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP_KEY    AVX__VAES_ENC_LAST, 0
719*f6dc9357SAndroid Build Coastguard Worker
720*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP        AVX__XOR_WITH_DATA
721*f6dc9357SAndroid Build Coastguard Worker        AVX__WOP        AVX__WRITE_TO_DATA
722*f6dc9357SAndroid Build Coastguard Worker
723*f6dc9357SAndroid Build Coastguard Worker        add     rD, ways * 32
724*f6dc9357SAndroid Build Coastguard Worker        sub     rN, ways * 2
725*f6dc9357SAndroid Build Coastguard Worker        jnc     avx_ctr_nextBlock2
726*f6dc9357SAndroid Build Coastguard Worker        add     rN, ways * 2
727*f6dc9357SAndroid Build Coastguard Worker
728*f6dc9357SAndroid Build Coastguard Worker        vextracti128    iv, iv_ymm, 1
729*f6dc9357SAndroid Build Coastguard Worker        sar     ksize_r, 1
730*f6dc9357SAndroid Build Coastguard Worker
731*f6dc9357SAndroid Build Coastguard Worker        add     r4, AVX_STACK_SUB
732*f6dc9357SAndroid Build Coastguard Worker        pop     keys2
733*f6dc9357SAndroid Build Coastguard Worker
734*f6dc9357SAndroid Build Coastguard Worker        vzeroupper
735*f6dc9357SAndroid Build Coastguard Worker        jmp     Ctr_start_3
736*f6dc9357SAndroid Build Coastguard Worker  else
737*f6dc9357SAndroid Build Coastguard Worker        jmp     Ctr_start
738*f6dc9357SAndroid Build Coastguard Worker  endif
739*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP
740*f6dc9357SAndroid Build Coastguard WorkerMY_SEG_ENDP
741*f6dc9357SAndroid Build Coastguard Worker
742*f6dc9357SAndroid Build Coastguard Workerend
743