xref: /aosp_15_r20/external/lzma/Asm/x86/Sha256Opt.asm (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1*f6dc9357SAndroid Build Coastguard Worker; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2*f6dc9357SAndroid Build Coastguard Worker; 2024-06-16 : Igor Pavlov : Public domain
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm
5*f6dc9357SAndroid Build Coastguard Worker
6*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START
7*f6dc9357SAndroid Build Coastguard Worker
8*f6dc9357SAndroid Build Coastguard Worker; .data
9*f6dc9357SAndroid Build Coastguard Worker; public K
10*f6dc9357SAndroid Build Coastguard Worker
11*f6dc9357SAndroid Build Coastguard Worker; we can use external SHA256_K_ARRAY defined in Sha256.c
12*f6dc9357SAndroid Build Coastguard Worker; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
13*f6dc9357SAndroid Build Coastguard Worker
14*f6dc9357SAndroid Build Coastguard WorkerCOMMENT @
15*f6dc9357SAndroid Build Coastguard Workerifdef x64
16*f6dc9357SAndroid Build Coastguard WorkerK_CONST equ SHA256_K_ARRAY
17*f6dc9357SAndroid Build Coastguard Workerelse
18*f6dc9357SAndroid Build Coastguard WorkerK_CONST equ _SHA256_K_ARRAY
19*f6dc9357SAndroid Build Coastguard Workerendif
20*f6dc9357SAndroid Build Coastguard WorkerEXTRN   K_CONST:xmmword
21*f6dc9357SAndroid Build Coastguard Worker@
22*f6dc9357SAndroid Build Coastguard Worker
23*f6dc9357SAndroid Build Coastguard WorkerCONST   SEGMENT READONLY
24*f6dc9357SAndroid Build Coastguard Worker
25*f6dc9357SAndroid Build Coastguard Workeralign 16
26*f6dc9357SAndroid Build Coastguard WorkerReverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
27*f6dc9357SAndroid Build Coastguard Worker
28*f6dc9357SAndroid Build Coastguard Worker; COMMENT @
29*f6dc9357SAndroid Build Coastguard Workeralign 16
30*f6dc9357SAndroid Build Coastguard WorkerK_CONST \
31*f6dc9357SAndroid Build Coastguard WorkerDD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
32*f6dc9357SAndroid Build Coastguard WorkerDD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
33*f6dc9357SAndroid Build Coastguard WorkerDD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
34*f6dc9357SAndroid Build Coastguard WorkerDD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
35*f6dc9357SAndroid Build Coastguard WorkerDD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
36*f6dc9357SAndroid Build Coastguard WorkerDD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
37*f6dc9357SAndroid Build Coastguard WorkerDD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
38*f6dc9357SAndroid Build Coastguard WorkerDD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
39*f6dc9357SAndroid Build Coastguard WorkerDD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
40*f6dc9357SAndroid Build Coastguard WorkerDD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
41*f6dc9357SAndroid Build Coastguard WorkerDD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
42*f6dc9357SAndroid Build Coastguard WorkerDD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
43*f6dc9357SAndroid Build Coastguard WorkerDD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
44*f6dc9357SAndroid Build Coastguard WorkerDD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
45*f6dc9357SAndroid Build Coastguard WorkerDD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
46*f6dc9357SAndroid Build Coastguard WorkerDD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
47*f6dc9357SAndroid Build Coastguard Worker; @
48*f6dc9357SAndroid Build Coastguard Worker
49*f6dc9357SAndroid Build Coastguard WorkerCONST   ENDS
50*f6dc9357SAndroid Build Coastguard Worker
51*f6dc9357SAndroid Build Coastguard Worker; _TEXT$SHA256OPT SEGMENT 'CODE'
52*f6dc9357SAndroid Build Coastguard Worker
53*f6dc9357SAndroid Build Coastguard Workerifndef x64
54*f6dc9357SAndroid Build Coastguard Worker    .686
55*f6dc9357SAndroid Build Coastguard Worker    .xmm
56*f6dc9357SAndroid Build Coastguard Workerendif
57*f6dc9357SAndroid Build Coastguard Worker
58*f6dc9357SAndroid Build Coastguard Worker; jwasm-based assemblers for linux and linker from new versions of binutils
59*f6dc9357SAndroid Build Coastguard Worker; can generate incorrect code for load [ARRAY + offset] instructions.
60*f6dc9357SAndroid Build Coastguard Worker; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem
61*f6dc9357SAndroid Build Coastguard Worker        rTable  equ r0
62*f6dc9357SAndroid Build Coastguard Worker        ; rTable  equ K_CONST
63*f6dc9357SAndroid Build Coastguard Worker
64*f6dc9357SAndroid Build Coastguard Workerifdef x64
65*f6dc9357SAndroid Build Coastguard Worker        rNum    equ REG_ABI_PARAM_2
66*f6dc9357SAndroid Build Coastguard Worker    if (IS_LINUX eq 0)
67*f6dc9357SAndroid Build Coastguard Worker        LOCAL_SIZE equ (16 * 2)
68*f6dc9357SAndroid Build Coastguard Worker    endif
69*f6dc9357SAndroid Build Coastguard Workerelse
70*f6dc9357SAndroid Build Coastguard Worker        rNum    equ r3
71*f6dc9357SAndroid Build Coastguard Worker        LOCAL_SIZE equ (16 * 1)
72*f6dc9357SAndroid Build Coastguard Workerendif
73*f6dc9357SAndroid Build Coastguard Worker
74*f6dc9357SAndroid Build Coastguard WorkerrState equ REG_ABI_PARAM_0
75*f6dc9357SAndroid Build Coastguard WorkerrData  equ REG_ABI_PARAM_1
76*f6dc9357SAndroid Build Coastguard Worker
77*f6dc9357SAndroid Build Coastguard Worker
78*f6dc9357SAndroid Build Coastguard Worker
79*f6dc9357SAndroid Build Coastguard Worker
80*f6dc9357SAndroid Build Coastguard Worker
81*f6dc9357SAndroid Build Coastguard Worker
82*f6dc9357SAndroid Build Coastguard WorkerMY_SHA_INSTR macro cmd, a1, a2
83*f6dc9357SAndroid Build Coastguard Worker        db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
84*f6dc9357SAndroid Build Coastguard Workerendm
85*f6dc9357SAndroid Build Coastguard Worker
86*f6dc9357SAndroid Build Coastguard Workercmd_sha256rnds2 equ 0cbH
87*f6dc9357SAndroid Build Coastguard Workercmd_sha256msg1  equ 0ccH
88*f6dc9357SAndroid Build Coastguard Workercmd_sha256msg2  equ 0cdH
89*f6dc9357SAndroid Build Coastguard Worker
90*f6dc9357SAndroid Build Coastguard WorkerMY_sha256rnds2 macro a1, a2
91*f6dc9357SAndroid Build Coastguard Worker        MY_SHA_INSTR  cmd_sha256rnds2, a1, a2
92*f6dc9357SAndroid Build Coastguard Workerendm
93*f6dc9357SAndroid Build Coastguard Worker
94*f6dc9357SAndroid Build Coastguard WorkerMY_sha256msg1 macro a1, a2
95*f6dc9357SAndroid Build Coastguard Worker        MY_SHA_INSTR  cmd_sha256msg1, a1, a2
96*f6dc9357SAndroid Build Coastguard Workerendm
97*f6dc9357SAndroid Build Coastguard Worker
98*f6dc9357SAndroid Build Coastguard WorkerMY_sha256msg2 macro a1, a2
99*f6dc9357SAndroid Build Coastguard Worker        MY_SHA_INSTR  cmd_sha256msg2, a1, a2
100*f6dc9357SAndroid Build Coastguard Workerendm
101*f6dc9357SAndroid Build Coastguard Worker
102*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG macro
103*f6dc9357SAndroid Build Coastguard Worker    ifdef x64
104*f6dc9357SAndroid Build Coastguard Worker      if (IS_LINUX eq 0)
105*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4 + 8], xmm6
106*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4 + 8 + 16], xmm7
107*f6dc9357SAndroid Build Coastguard Worker        sub     r4, LOCAL_SIZE + 8
108*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4     ], xmm8
109*f6dc9357SAndroid Build Coastguard Worker        movdqa  [r4 + 16], xmm9
110*f6dc9357SAndroid Build Coastguard Worker      endif
111*f6dc9357SAndroid Build Coastguard Worker    else ; x86
112*f6dc9357SAndroid Build Coastguard Worker        push    r3
113*f6dc9357SAndroid Build Coastguard Worker        push    r5
114*f6dc9357SAndroid Build Coastguard Worker        mov     r5, r4
115*f6dc9357SAndroid Build Coastguard Worker        NUM_PUSH_REGS   equ 2
116*f6dc9357SAndroid Build Coastguard Worker        PARAM_OFFSET    equ (REG_SIZE * (1 + NUM_PUSH_REGS))
117*f6dc9357SAndroid Build Coastguard Worker      if (IS_CDECL gt 0)
118*f6dc9357SAndroid Build Coastguard Worker        mov     rState, [r4 + PARAM_OFFSET]
119*f6dc9357SAndroid Build Coastguard Worker        mov     rData,  [r4 + PARAM_OFFSET + REG_SIZE * 1]
120*f6dc9357SAndroid Build Coastguard Worker        mov     rNum,   [r4 + PARAM_OFFSET + REG_SIZE * 2]
121*f6dc9357SAndroid Build Coastguard Worker      else ; fastcall
122*f6dc9357SAndroid Build Coastguard Worker        mov     rNum,   [r4 + PARAM_OFFSET]
123*f6dc9357SAndroid Build Coastguard Worker      endif
124*f6dc9357SAndroid Build Coastguard Worker        and     r4, -16
125*f6dc9357SAndroid Build Coastguard Worker        sub     r4, LOCAL_SIZE
126*f6dc9357SAndroid Build Coastguard Worker    endif
127*f6dc9357SAndroid Build Coastguard Workerendm
128*f6dc9357SAndroid Build Coastguard Worker
129*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG macro
130*f6dc9357SAndroid Build Coastguard Worker    ifdef x64
131*f6dc9357SAndroid Build Coastguard Worker      if (IS_LINUX eq 0)
132*f6dc9357SAndroid Build Coastguard Worker        movdqa  xmm8, [r4]
133*f6dc9357SAndroid Build Coastguard Worker        movdqa  xmm9, [r4 + 16]
134*f6dc9357SAndroid Build Coastguard Worker        add     r4, LOCAL_SIZE + 8
135*f6dc9357SAndroid Build Coastguard Worker        movdqa  xmm6, [r4 + 8]
136*f6dc9357SAndroid Build Coastguard Worker        movdqa  xmm7, [r4 + 8 + 16]
137*f6dc9357SAndroid Build Coastguard Worker      endif
138*f6dc9357SAndroid Build Coastguard Worker    else ; x86
139*f6dc9357SAndroid Build Coastguard Worker        mov     r4, r5
140*f6dc9357SAndroid Build Coastguard Worker        pop     r5
141*f6dc9357SAndroid Build Coastguard Worker        pop     r3
142*f6dc9357SAndroid Build Coastguard Worker    endif
143*f6dc9357SAndroid Build Coastguard Worker    MY_ENDP
144*f6dc9357SAndroid Build Coastguard Workerendm
145*f6dc9357SAndroid Build Coastguard Worker
146*f6dc9357SAndroid Build Coastguard Worker
147*f6dc9357SAndroid Build Coastguard Workermsg        equ xmm0
148*f6dc9357SAndroid Build Coastguard Workertmp        equ xmm0
149*f6dc9357SAndroid Build Coastguard Workerstate0_N   equ 2
150*f6dc9357SAndroid Build Coastguard Workerstate1_N   equ 3
151*f6dc9357SAndroid Build Coastguard Workerw_regs     equ 4
152*f6dc9357SAndroid Build Coastguard Worker
153*f6dc9357SAndroid Build Coastguard Worker
154*f6dc9357SAndroid Build Coastguard Workerstate1_save equ xmm1
155*f6dc9357SAndroid Build Coastguard Workerstate0  equ @CatStr(xmm, %state0_N)
156*f6dc9357SAndroid Build Coastguard Workerstate1  equ @CatStr(xmm, %state1_N)
157*f6dc9357SAndroid Build Coastguard Worker
158*f6dc9357SAndroid Build Coastguard Worker
159*f6dc9357SAndroid Build Coastguard Workerifdef x64
160*f6dc9357SAndroid Build Coastguard Worker        state0_save  equ  xmm8
161*f6dc9357SAndroid Build Coastguard Worker        mask2        equ  xmm9
162*f6dc9357SAndroid Build Coastguard Workerelse
163*f6dc9357SAndroid Build Coastguard Worker        state0_save  equ  [r4]
164*f6dc9357SAndroid Build Coastguard Worker        mask2        equ  xmm0
165*f6dc9357SAndroid Build Coastguard Workerendif
166*f6dc9357SAndroid Build Coastguard Worker
167*f6dc9357SAndroid Build Coastguard WorkerLOAD_MASK macro
168*f6dc9357SAndroid Build Coastguard Worker        movdqa  mask2, XMMWORD PTR Reverse_Endian_Mask
169*f6dc9357SAndroid Build Coastguard Workerendm
170*f6dc9357SAndroid Build Coastguard Worker
171*f6dc9357SAndroid Build Coastguard WorkerLOAD_W macro k:req
172*f6dc9357SAndroid Build Coastguard Worker        movdqu  @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
173*f6dc9357SAndroid Build Coastguard Worker        pshufb  @CatStr(xmm, %(w_regs + k)), mask2
174*f6dc9357SAndroid Build Coastguard Workerendm
175*f6dc9357SAndroid Build Coastguard Worker
176*f6dc9357SAndroid Build Coastguard Worker
177*f6dc9357SAndroid Build Coastguard Worker; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
178*f6dc9357SAndroid Build Coastguard Workerpre1 equ 3
179*f6dc9357SAndroid Build Coastguard Workerpre2 equ 2
180*f6dc9357SAndroid Build Coastguard Worker
181*f6dc9357SAndroid Build Coastguard Worker
182*f6dc9357SAndroid Build Coastguard Worker
183*f6dc9357SAndroid Build Coastguard WorkerRND4 macro k
184*f6dc9357SAndroid Build Coastguard Worker        movdqa  msg, xmmword ptr [rTable + (k) * 16]
185*f6dc9357SAndroid Build Coastguard Worker        paddd   msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
186*f6dc9357SAndroid Build Coastguard Worker        MY_sha256rnds2 state0_N, state1_N
187*f6dc9357SAndroid Build Coastguard Worker        pshufd   msg, msg, 0eH
188*f6dc9357SAndroid Build Coastguard Worker
189*f6dc9357SAndroid Build Coastguard Worker    if (k GE (4 - pre1)) AND (k LT (16 - pre1))
190*f6dc9357SAndroid Build Coastguard Worker        ; w4[0] = msg1(w4[-4], w4[-3])
191*f6dc9357SAndroid Build Coastguard Worker        MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
192*f6dc9357SAndroid Build Coastguard Worker    endif
193*f6dc9357SAndroid Build Coastguard Worker
194*f6dc9357SAndroid Build Coastguard Worker        MY_sha256rnds2 state1_N, state0_N
195*f6dc9357SAndroid Build Coastguard Worker
196*f6dc9357SAndroid Build Coastguard Worker    if (k GE (4 - pre2)) AND (k LT (16 - pre2))
197*f6dc9357SAndroid Build Coastguard Worker        movdqa  tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
198*f6dc9357SAndroid Build Coastguard Worker        palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
199*f6dc9357SAndroid Build Coastguard Worker        paddd   @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
200*f6dc9357SAndroid Build Coastguard Worker        ; w4[0] = msg2(w4[0], w4[-1])
201*f6dc9357SAndroid Build Coastguard Worker        MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
202*f6dc9357SAndroid Build Coastguard Worker    endif
203*f6dc9357SAndroid Build Coastguard Workerendm
204*f6dc9357SAndroid Build Coastguard Worker
205*f6dc9357SAndroid Build Coastguard Worker
206*f6dc9357SAndroid Build Coastguard Worker
207*f6dc9357SAndroid Build Coastguard Worker
208*f6dc9357SAndroid Build Coastguard Worker
209*f6dc9357SAndroid Build Coastguard WorkerREVERSE_STATE macro
210*f6dc9357SAndroid Build Coastguard Worker                               ; state0 ; dcba
211*f6dc9357SAndroid Build Coastguard Worker                               ; state1 ; hgfe
212*f6dc9357SAndroid Build Coastguard Worker        pshufd      tmp, state0, 01bH   ; abcd
213*f6dc9357SAndroid Build Coastguard Worker        pshufd   state0, state1, 01bH   ; efgh
214*f6dc9357SAndroid Build Coastguard Worker        movdqa   state1, state0         ; efgh
215*f6dc9357SAndroid Build Coastguard Worker        punpcklqdq  state0, tmp         ; cdgh
216*f6dc9357SAndroid Build Coastguard Worker        punpckhqdq  state1, tmp         ; abef
217*f6dc9357SAndroid Build Coastguard Workerendm
218*f6dc9357SAndroid Build Coastguard Worker
219*f6dc9357SAndroid Build Coastguard Worker
220*f6dc9357SAndroid Build Coastguard WorkerMY_PROC Sha256_UpdateBlocks_HW, 3
221*f6dc9357SAndroid Build Coastguard Worker    MY_PROLOG
222*f6dc9357SAndroid Build Coastguard Worker
223*f6dc9357SAndroid Build Coastguard Worker        lea     rTable, [K_CONST]
224*f6dc9357SAndroid Build Coastguard Worker
225*f6dc9357SAndroid Build Coastguard Worker        cmp     rNum, 0
226*f6dc9357SAndroid Build Coastguard Worker        je      end_c
227*f6dc9357SAndroid Build Coastguard Worker
228*f6dc9357SAndroid Build Coastguard Worker        movdqu   state0, [rState]       ; dcba
229*f6dc9357SAndroid Build Coastguard Worker        movdqu   state1, [rState + 16]  ; hgfe
230*f6dc9357SAndroid Build Coastguard Worker
231*f6dc9357SAndroid Build Coastguard Worker        REVERSE_STATE
232*f6dc9357SAndroid Build Coastguard Worker
233*f6dc9357SAndroid Build Coastguard Worker        ifdef x64
234*f6dc9357SAndroid Build Coastguard Worker        LOAD_MASK
235*f6dc9357SAndroid Build Coastguard Worker        endif
236*f6dc9357SAndroid Build Coastguard Worker
237*f6dc9357SAndroid Build Coastguard Worker    align 16
238*f6dc9357SAndroid Build Coastguard Worker    nextBlock:
239*f6dc9357SAndroid Build Coastguard Worker        movdqa  state0_save, state0
240*f6dc9357SAndroid Build Coastguard Worker        movdqa  state1_save, state1
241*f6dc9357SAndroid Build Coastguard Worker
242*f6dc9357SAndroid Build Coastguard Worker        ifndef x64
243*f6dc9357SAndroid Build Coastguard Worker        LOAD_MASK
244*f6dc9357SAndroid Build Coastguard Worker        endif
245*f6dc9357SAndroid Build Coastguard Worker
246*f6dc9357SAndroid Build Coastguard Worker        LOAD_W 0
247*f6dc9357SAndroid Build Coastguard Worker        LOAD_W 1
248*f6dc9357SAndroid Build Coastguard Worker        LOAD_W 2
249*f6dc9357SAndroid Build Coastguard Worker        LOAD_W 3
250*f6dc9357SAndroid Build Coastguard Worker
251*f6dc9357SAndroid Build Coastguard Worker
252*f6dc9357SAndroid Build Coastguard Worker        k = 0
253*f6dc9357SAndroid Build Coastguard Worker        rept 16
254*f6dc9357SAndroid Build Coastguard Worker          RND4 k
255*f6dc9357SAndroid Build Coastguard Worker          k = k + 1
256*f6dc9357SAndroid Build Coastguard Worker        endm
257*f6dc9357SAndroid Build Coastguard Worker
258*f6dc9357SAndroid Build Coastguard Worker        paddd   state0, state0_save
259*f6dc9357SAndroid Build Coastguard Worker        paddd   state1, state1_save
260*f6dc9357SAndroid Build Coastguard Worker
261*f6dc9357SAndroid Build Coastguard Worker        add     rData, 64
262*f6dc9357SAndroid Build Coastguard Worker        sub     rNum, 1
263*f6dc9357SAndroid Build Coastguard Worker        jnz     nextBlock
264*f6dc9357SAndroid Build Coastguard Worker
265*f6dc9357SAndroid Build Coastguard Worker        REVERSE_STATE
266*f6dc9357SAndroid Build Coastguard Worker
267*f6dc9357SAndroid Build Coastguard Worker        movdqu  [rState], state0
268*f6dc9357SAndroid Build Coastguard Worker        movdqu  [rState + 16], state1
269*f6dc9357SAndroid Build Coastguard Worker
270*f6dc9357SAndroid Build Coastguard Worker  end_c:
271*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG
272*f6dc9357SAndroid Build Coastguard Worker
273*f6dc9357SAndroid Build Coastguard Worker; _TEXT$SHA256OPT ENDS
274*f6dc9357SAndroid Build Coastguard Worker
275*f6dc9357SAndroid Build Coastguard Workerend
276