xref: /aosp_15_r20/external/lzma/Asm/x86/XzCrc64Opt.asm (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1*f6dc9357SAndroid Build Coastguard Worker; XzCrc64Opt.asm -- CRC64 calculation : optimized version
2*f6dc9357SAndroid Build Coastguard Worker; 2023-12-08 : Igor Pavlov : Public domain
3*f6dc9357SAndroid Build Coastguard Worker
4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm
5*f6dc9357SAndroid Build Coastguard Worker
6*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START
7*f6dc9357SAndroid Build Coastguard Worker
8*f6dc9357SAndroid Build Coastguard WorkerNUM_WORDS       equ     3
9*f6dc9357SAndroid Build Coastguard Worker
10*f6dc9357SAndroid Build Coastguard Workerif (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
11*f6dc9357SAndroid Build Coastguard Worker.err <num_words_IS_INCORRECT>
12*f6dc9357SAndroid Build Coastguard Workerendif
13*f6dc9357SAndroid Build Coastguard Worker
14*f6dc9357SAndroid Build Coastguard WorkerNUM_SKIP_BYTES  equ     ((NUM_WORDS - 2) * 4)
15*f6dc9357SAndroid Build Coastguard Worker
16*f6dc9357SAndroid Build Coastguard Worker
17*f6dc9357SAndroid Build Coastguard WorkerMOVZXLO macro dest:req, src:req
18*f6dc9357SAndroid Build Coastguard Worker        movzx   dest, @CatStr(src, _L)
19*f6dc9357SAndroid Build Coastguard Workerendm
20*f6dc9357SAndroid Build Coastguard Worker
21*f6dc9357SAndroid Build Coastguard WorkerMOVZXHI macro dest:req, src:req
22*f6dc9357SAndroid Build Coastguard Worker        movzx   dest, @CatStr(src, _H)
23*f6dc9357SAndroid Build Coastguard Workerendm
24*f6dc9357SAndroid Build Coastguard Worker
25*f6dc9357SAndroid Build Coastguard Worker
26*f6dc9357SAndroid Build Coastguard Workerifdef x64
27*f6dc9357SAndroid Build Coastguard Worker
28*f6dc9357SAndroid Build Coastguard WorkerrD      equ  r11
29*f6dc9357SAndroid Build Coastguard WorkerrN      equ  r10
30*f6dc9357SAndroid Build Coastguard WorkerrT      equ  r9
31*f6dc9357SAndroid Build Coastguard Worker
32*f6dc9357SAndroid Build Coastguard WorkerCRC_OP macro op:req, dest:req, src:req, t:req
33*f6dc9357SAndroid Build Coastguard Worker        op      dest, QWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t)]
34*f6dc9357SAndroid Build Coastguard Workerendm
35*f6dc9357SAndroid Build Coastguard Worker
36*f6dc9357SAndroid Build Coastguard WorkerCRC_XOR macro dest:req, src:req, t:req
37*f6dc9357SAndroid Build Coastguard Worker        CRC_OP  xor, dest, src, t
38*f6dc9357SAndroid Build Coastguard Workerendm
39*f6dc9357SAndroid Build Coastguard Worker
40*f6dc9357SAndroid Build Coastguard WorkerCRC_MOV macro dest:req, src:req, t:req
41*f6dc9357SAndroid Build Coastguard Worker        CRC_OP  mov, dest, src, t
42*f6dc9357SAndroid Build Coastguard Workerendm
43*f6dc9357SAndroid Build Coastguard Worker
44*f6dc9357SAndroid Build Coastguard WorkerCRC1b macro
45*f6dc9357SAndroid Build Coastguard Worker        movzx   x6, BYTE PTR [rD]
46*f6dc9357SAndroid Build Coastguard Worker        inc     rD
47*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x0
48*f6dc9357SAndroid Build Coastguard Worker        xor     x6, x3
49*f6dc9357SAndroid Build Coastguard Worker        shr     r0, 8
50*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r0, x6, 0
51*f6dc9357SAndroid Build Coastguard Worker        dec     rN
52*f6dc9357SAndroid Build Coastguard Workerendm
53*f6dc9357SAndroid Build Coastguard Worker
54*f6dc9357SAndroid Build Coastguard Worker
55*f6dc9357SAndroid Build Coastguard Worker; ALIGN_MASK is 3 or 7 bytes alignment:
56*f6dc9357SAndroid Build Coastguard WorkerALIGN_MASK      equ  (7 - (NUM_WORDS and 1) * 4)
57*f6dc9357SAndroid Build Coastguard Worker
58*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS eq 1
59*f6dc9357SAndroid Build Coastguard Worker
60*f6dc9357SAndroid Build Coastguard Workersrc_rN_offset   equ  4
61*f6dc9357SAndroid Build Coastguard Worker; + 4 for prefetching next 4-bytes after current iteration
62*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ  (NUM_WORDS * 4 + 4)
63*f6dc9357SAndroid Build Coastguard WorkerSRCDAT4         equ  DWORD PTR [rN + rD * 1]
64*f6dc9357SAndroid Build Coastguard Worker
65*f6dc9357SAndroid Build Coastguard WorkerXOR_NEXT macro
66*f6dc9357SAndroid Build Coastguard Worker        mov     x1, [rD]
67*f6dc9357SAndroid Build Coastguard Worker        xor     r0, r1
68*f6dc9357SAndroid Build Coastguard Workerendm
69*f6dc9357SAndroid Build Coastguard Worker
70*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1
71*f6dc9357SAndroid Build Coastguard Worker
72*f6dc9357SAndroid Build Coastguard Workersrc_rN_offset   equ 8
73*f6dc9357SAndroid Build Coastguard Worker; + 8 for prefetching next 8-bytes after current iteration
74*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 8)
75*f6dc9357SAndroid Build Coastguard Worker
76*f6dc9357SAndroid Build Coastguard WorkerXOR_NEXT macro
77*f6dc9357SAndroid Build Coastguard Worker        xor     r0, QWORD PTR [rD] ; 64-bit read, can be unaligned
78*f6dc9357SAndroid Build Coastguard Workerendm
79*f6dc9357SAndroid Build Coastguard Worker
80*f6dc9357SAndroid Build Coastguard Worker; 32-bit or 64-bit
81*f6dc9357SAndroid Build Coastguard WorkerLOAD_SRC_MULT4 macro dest:req, word_index:req
82*f6dc9357SAndroid Build Coastguard Worker        mov     dest, [rN + rD * 1 + 4 * (word_index) - src_rN_offset];
83*f6dc9357SAndroid Build Coastguard Workerendm
84*f6dc9357SAndroid Build Coastguard Worker
85*f6dc9357SAndroid Build Coastguard Workerendif
86*f6dc9357SAndroid Build Coastguard Worker
87*f6dc9357SAndroid Build Coastguard Worker
88*f6dc9357SAndroid Build Coastguard Worker
89*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 4
90*f6dc9357SAndroid Build Coastguard Worker        MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
91*f6dc9357SAndroid Build Coastguard Worker
92*f6dc9357SAndroid Build Coastguard Worker        mov     r0, REG_ABI_PARAM_0   ; r0  <- r1 / r7
93*f6dc9357SAndroid Build Coastguard Worker        mov     rD, REG_ABI_PARAM_1   ; r11 <- r2 / r6
94*f6dc9357SAndroid Build Coastguard Worker        mov     rN, REG_ABI_PARAM_2   ; r10 <- r8 / r2
95*f6dc9357SAndroid Build Coastguard Workerif  (IS_LINUX gt 0)
96*f6dc9357SAndroid Build Coastguard Worker        mov     rT, REG_ABI_PARAM_3   ; r9  <- r9 / r1
97*f6dc9357SAndroid Build Coastguard Workerendif
98*f6dc9357SAndroid Build Coastguard Worker
99*f6dc9357SAndroid Build Coastguard Worker        cmp     rN, NUM_BYTES_LIMIT + ALIGN_MASK
100*f6dc9357SAndroid Build Coastguard Worker        jb      crc_end
101*f6dc9357SAndroid Build Coastguard Worker@@:
102*f6dc9357SAndroid Build Coastguard Worker        test    rD, ALIGN_MASK
103*f6dc9357SAndroid Build Coastguard Worker        jz      @F
104*f6dc9357SAndroid Build Coastguard Worker        CRC1b
105*f6dc9357SAndroid Build Coastguard Worker        jmp     @B
106*f6dc9357SAndroid Build Coastguard Worker@@:
107*f6dc9357SAndroid Build Coastguard Worker        XOR_NEXT
108*f6dc9357SAndroid Build Coastguard Worker        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
109*f6dc9357SAndroid Build Coastguard Worker        sub     rD, rN
110*f6dc9357SAndroid Build Coastguard Worker        add     rN, src_rN_offset
111*f6dc9357SAndroid Build Coastguard Worker
112*f6dc9357SAndroid Build Coastguard Workeralign 16
113*f6dc9357SAndroid Build Coastguard Worker@@:
114*f6dc9357SAndroid Build Coastguard Worker
115*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS eq 1
116*f6dc9357SAndroid Build Coastguard Worker
117*f6dc9357SAndroid Build Coastguard Worker        mov     x1, x0
118*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 8
119*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x1
120*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x2, x0
121*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 8
122*f6dc9357SAndroid Build Coastguard Worker        shr     r0, 32
123*f6dc9357SAndroid Build Coastguard Worker        xor     x0, SRCDAT4
124*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r0, x2, 3
125*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r0, x3, 2
126*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x2, x1
127*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 8
128*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r0, x2, 1
129*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r0, x1, 0
130*f6dc9357SAndroid Build Coastguard Worker
131*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1
132*f6dc9357SAndroid Build Coastguard Worker
133*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS ne 2
134*f6dc9357SAndroid Build Coastguard Worker  k = 2
135*f6dc9357SAndroid Build Coastguard Worker  while k lt NUM_WORDS
136*f6dc9357SAndroid Build Coastguard Worker
137*f6dc9357SAndroid Build Coastguard Worker        LOAD_SRC_MULT4  x1, k
138*f6dc9357SAndroid Build Coastguard Worker    crc_op1  textequ <xor>
139*f6dc9357SAndroid Build Coastguard Worker
140*f6dc9357SAndroid Build Coastguard Worker    if k eq 2
141*f6dc9357SAndroid Build Coastguard Worker      if (NUM_WORDS and 1)
142*f6dc9357SAndroid Build Coastguard Worker        LOAD_SRC_MULT4  x7, NUM_WORDS       ; aligned 32-bit
143*f6dc9357SAndroid Build Coastguard Worker        LOAD_SRC_MULT4  x6, NUM_WORDS + 1   ; aligned 32-bit
144*f6dc9357SAndroid Build Coastguard Worker        shl     r6, 32
145*f6dc9357SAndroid Build Coastguard Worker      else
146*f6dc9357SAndroid Build Coastguard Worker        LOAD_SRC_MULT4  r6, NUM_WORDS       ; aligned 64-bit
147*f6dc9357SAndroid Build Coastguard Worker        crc_op1  textequ <mov>
148*f6dc9357SAndroid Build Coastguard Worker      endif
149*f6dc9357SAndroid Build Coastguard Worker    endif
150*f6dc9357SAndroid Build Coastguard Worker        table = 4 * (NUM_WORDS - 1 - k)
151*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x1
152*f6dc9357SAndroid Build Coastguard Worker        CRC_OP crc_op1, r7, x3, 3 + table
153*f6dc9357SAndroid Build Coastguard Worker        MOVZXHI x3, x1
154*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 16
155*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r6, x3, 2 + table
156*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x1
157*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 8
158*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r7, x3, 1 + table
159*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r6, x1, 0 + table
160*f6dc9357SAndroid Build Coastguard Worker        k = k + 1
161*f6dc9357SAndroid Build Coastguard Worker  endm
162*f6dc9357SAndroid Build Coastguard Worker        crc_op2  textequ <xor>
163*f6dc9357SAndroid Build Coastguard Worker
164*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS == 2
165*f6dc9357SAndroid Build Coastguard Worker        LOAD_SRC_MULT4  r6, NUM_WORDS       ; aligned 64-bit
166*f6dc9357SAndroid Build Coastguard Worker        crc_op2  textequ <mov>
167*f6dc9357SAndroid Build Coastguard Workerendif ; NUM_WORDS == 2
168*f6dc9357SAndroid Build Coastguard Worker
169*f6dc9357SAndroid Build Coastguard Worker        MOVZXHI x3, x0
170*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x2, x0
171*f6dc9357SAndroid Build Coastguard Worker        mov     r1, r0
172*f6dc9357SAndroid Build Coastguard Worker        shr     r1, 32
173*f6dc9357SAndroid Build Coastguard Worker        shr     x0, 16
174*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r6, x2, NUM_SKIP_BYTES + 7
175*f6dc9357SAndroid Build Coastguard Worker        CRC_OP  crc_op2, r7, x3, NUM_SKIP_BYTES + 6
176*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x2, x0
177*f6dc9357SAndroid Build Coastguard Worker        MOVZXHI x5, x1
178*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x1
179*f6dc9357SAndroid Build Coastguard Worker        shr     x0, 8
180*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 16
181*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r7, x2, NUM_SKIP_BYTES + 5
182*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r6, x3, NUM_SKIP_BYTES + 3
183*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r7, x0, NUM_SKIP_BYTES + 4
184*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r6, x5, NUM_SKIP_BYTES + 2
185*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x2, x1
186*f6dc9357SAndroid Build Coastguard Worker        shr     x1, 8
187*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR r7, x2, NUM_SKIP_BYTES + 1
188*f6dc9357SAndroid Build Coastguard Worker        CRC_MOV r0, x1, NUM_SKIP_BYTES + 0
189*f6dc9357SAndroid Build Coastguard Worker        xor     r0, r6
190*f6dc9357SAndroid Build Coastguard Worker        xor     r0, r7
191*f6dc9357SAndroid Build Coastguard Worker
192*f6dc9357SAndroid Build Coastguard Workerendif ; NUM_WORDS > 1
193*f6dc9357SAndroid Build Coastguard Worker        add     rD, NUM_WORDS * 4
194*f6dc9357SAndroid Build Coastguard Worker        jnc     @B
195*f6dc9357SAndroid Build Coastguard Worker
196*f6dc9357SAndroid Build Coastguard Worker        sub     rN, src_rN_offset
197*f6dc9357SAndroid Build Coastguard Worker        add     rD, rN
198*f6dc9357SAndroid Build Coastguard Worker        XOR_NEXT
199*f6dc9357SAndroid Build Coastguard Worker        add     rN, NUM_BYTES_LIMIT - 1
200*f6dc9357SAndroid Build Coastguard Worker        sub     rN, rD
201*f6dc9357SAndroid Build Coastguard Worker
202*f6dc9357SAndroid Build Coastguard Workercrc_end:
203*f6dc9357SAndroid Build Coastguard Worker        test    rN, rN
204*f6dc9357SAndroid Build Coastguard Worker        jz      func_end
205*f6dc9357SAndroid Build Coastguard Worker@@:
206*f6dc9357SAndroid Build Coastguard Worker        CRC1b
207*f6dc9357SAndroid Build Coastguard Worker        jnz      @B
208*f6dc9357SAndroid Build Coastguard Workerfunc_end:
209*f6dc9357SAndroid Build Coastguard Worker        MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
210*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP
211*f6dc9357SAndroid Build Coastguard Worker
212*f6dc9357SAndroid Build Coastguard Worker
213*f6dc9357SAndroid Build Coastguard Worker
214*f6dc9357SAndroid Build Coastguard Workerelse
215*f6dc9357SAndroid Build Coastguard Worker; ==================================================================
216*f6dc9357SAndroid Build Coastguard Worker; x86 (32-bit)
217*f6dc9357SAndroid Build Coastguard Worker
218*f6dc9357SAndroid Build Coastguard WorkerrD      equ  r7
219*f6dc9357SAndroid Build Coastguard WorkerrN      equ  r1
220*f6dc9357SAndroid Build Coastguard WorkerrT      equ  r5
221*f6dc9357SAndroid Build Coastguard Worker
222*f6dc9357SAndroid Build Coastguard WorkerxA      equ  x6
223*f6dc9357SAndroid Build Coastguard WorkerxA_R    equ  r6
224*f6dc9357SAndroid Build Coastguard Worker
225*f6dc9357SAndroid Build Coastguard Workerifdef x64
226*f6dc9357SAndroid Build Coastguard Worker    num_VAR     equ  r8
227*f6dc9357SAndroid Build Coastguard Workerelse
228*f6dc9357SAndroid Build Coastguard Worker
229*f6dc9357SAndroid Build Coastguard Workercrc_OFFS  equ  (REG_SIZE * 5)
230*f6dc9357SAndroid Build Coastguard Worker
231*f6dc9357SAndroid Build Coastguard Workerif (IS_CDECL gt 0) or (IS_LINUX gt 0)
232*f6dc9357SAndroid Build Coastguard Worker    ; cdecl or (GNU fastcall) stack:
233*f6dc9357SAndroid Build Coastguard Worker    ;   (UInt32 *) table
234*f6dc9357SAndroid Build Coastguard Worker    ;   size_t     size
235*f6dc9357SAndroid Build Coastguard Worker    ;   void *     data
236*f6dc9357SAndroid Build Coastguard Worker    ;   (UInt64)   crc
237*f6dc9357SAndroid Build Coastguard Worker    ;   ret-ip <-(r4)
238*f6dc9357SAndroid Build Coastguard Worker    data_OFFS   equ  (8 + crc_OFFS)
239*f6dc9357SAndroid Build Coastguard Worker    size_OFFS   equ  (REG_SIZE + data_OFFS)
240*f6dc9357SAndroid Build Coastguard Worker    table_OFFS  equ  (REG_SIZE + size_OFFS)
241*f6dc9357SAndroid Build Coastguard Worker    num_VAR     equ  [r4 + size_OFFS]
242*f6dc9357SAndroid Build Coastguard Worker    table_VAR   equ  [r4 + table_OFFS]
243*f6dc9357SAndroid Build Coastguard Workerelse
244*f6dc9357SAndroid Build Coastguard Worker    ; Windows fastcall:
245*f6dc9357SAndroid Build Coastguard Worker    ;   r1 = data, r2 = size
246*f6dc9357SAndroid Build Coastguard Worker    ; stack:
247*f6dc9357SAndroid Build Coastguard Worker    ;   (UInt32 *) table
248*f6dc9357SAndroid Build Coastguard Worker    ;   (UInt64)   crc
249*f6dc9357SAndroid Build Coastguard Worker    ;   ret-ip <-(r4)
250*f6dc9357SAndroid Build Coastguard Worker    table_OFFS  equ  (8 + crc_OFFS)
251*f6dc9357SAndroid Build Coastguard Worker    table_VAR   equ  [r4 + table_OFFS]
252*f6dc9357SAndroid Build Coastguard Worker    num_VAR     equ  table_VAR
253*f6dc9357SAndroid Build Coastguard Workerendif
254*f6dc9357SAndroid Build Coastguard Workerendif ; x64
255*f6dc9357SAndroid Build Coastguard Worker
256*f6dc9357SAndroid Build Coastguard WorkerSRCDAT4         equ     DWORD PTR [rN + rD * 1]
257*f6dc9357SAndroid Build Coastguard Worker
258*f6dc9357SAndroid Build Coastguard WorkerCRC_1 macro op:req, dest:req, src:req, t:req, word_index:req
259*f6dc9357SAndroid Build Coastguard Worker        op      dest, DWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t) + (word_index) * 4]
260*f6dc9357SAndroid Build Coastguard Workerendm
261*f6dc9357SAndroid Build Coastguard Worker
262*f6dc9357SAndroid Build Coastguard WorkerCRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
263*f6dc9357SAndroid Build Coastguard Worker        CRC_1   op0, dest0, src, t, 0
264*f6dc9357SAndroid Build Coastguard Worker        CRC_1   op1, dest1, src, t, 1
265*f6dc9357SAndroid Build Coastguard Workerendm
266*f6dc9357SAndroid Build Coastguard Worker
267*f6dc9357SAndroid Build Coastguard WorkerCRC_XOR macro dest0:req, dest1:req, src:req, t:req
268*f6dc9357SAndroid Build Coastguard Worker        CRC xor, xor, dest0, dest1, src, t
269*f6dc9357SAndroid Build Coastguard Workerendm
270*f6dc9357SAndroid Build Coastguard Worker
271*f6dc9357SAndroid Build Coastguard Worker
272*f6dc9357SAndroid Build Coastguard WorkerCRC1b macro
273*f6dc9357SAndroid Build Coastguard Worker        movzx   xA, BYTE PTR [rD]
274*f6dc9357SAndroid Build Coastguard Worker        inc     rD
275*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO x3, x0
276*f6dc9357SAndroid Build Coastguard Worker        xor     xA, x3
277*f6dc9357SAndroid Build Coastguard Worker        shrd    x0, x2, 8
278*f6dc9357SAndroid Build Coastguard Worker        shr     x2, 8
279*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR x0, x2, xA, 0
280*f6dc9357SAndroid Build Coastguard Worker        dec     rN
281*f6dc9357SAndroid Build Coastguard Workerendm
282*f6dc9357SAndroid Build Coastguard Worker
283*f6dc9357SAndroid Build Coastguard Worker
284*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG_BASE macro
285*f6dc9357SAndroid Build Coastguard Worker        MY_PUSH_4_REGS
286*f6dc9357SAndroid Build Coastguard Workerifdef x64
287*f6dc9357SAndroid Build Coastguard Worker        mov     r0, REG_ABI_PARAM_0     ; r0 <- r1 / r7
288*f6dc9357SAndroid Build Coastguard Worker        mov     rT, REG_ABI_PARAM_3     ; r5 <- r9 / r1
289*f6dc9357SAndroid Build Coastguard Worker        mov     rN, REG_ABI_PARAM_2     ; r1 <- r8 / r2
290*f6dc9357SAndroid Build Coastguard Worker        mov     rD, REG_ABI_PARAM_1     ; r7 <- r2 / r6
291*f6dc9357SAndroid Build Coastguard Worker        mov     r2, r0
292*f6dc9357SAndroid Build Coastguard Worker        shr     r2, 32
293*f6dc9357SAndroid Build Coastguard Worker        mov     x0, x0
294*f6dc9357SAndroid Build Coastguard Workerelse
295*f6dc9357SAndroid Build Coastguard Worker    if (IS_CDECL gt 0) or (IS_LINUX gt 0)
296*f6dc9357SAndroid Build Coastguard Worker        proc_numParams = proc_numParams + 2 ; for ABI_LINUX
297*f6dc9357SAndroid Build Coastguard Worker        mov     rN, [r4 + size_OFFS]
298*f6dc9357SAndroid Build Coastguard Worker        mov     rD, [r4 + data_OFFS]
299*f6dc9357SAndroid Build Coastguard Worker    else
300*f6dc9357SAndroid Build Coastguard Worker        mov     rD, REG_ABI_PARAM_0     ; r7 <- r1 : (data)
301*f6dc9357SAndroid Build Coastguard Worker        mov     rN, REG_ABI_PARAM_1     ; r1 <- r2 : (size)
302*f6dc9357SAndroid Build Coastguard Worker    endif
303*f6dc9357SAndroid Build Coastguard Worker        mov     x0, [r4 + crc_OFFS]
304*f6dc9357SAndroid Build Coastguard Worker        mov     x2, [r4 + crc_OFFS + 4]
305*f6dc9357SAndroid Build Coastguard Worker        mov     rT, table_VAR
306*f6dc9357SAndroid Build Coastguard Workerendif
307*f6dc9357SAndroid Build Coastguard Workerendm
308*f6dc9357SAndroid Build Coastguard Worker
309*f6dc9357SAndroid Build Coastguard Worker
310*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG_BASE macro crc_end:req, func_end:req
311*f6dc9357SAndroid Build Coastguard Workercrc_end:
312*f6dc9357SAndroid Build Coastguard Worker        test    rN, rN
313*f6dc9357SAndroid Build Coastguard Worker        jz      func_end
314*f6dc9357SAndroid Build Coastguard Worker@@:
315*f6dc9357SAndroid Build Coastguard Worker        CRC1b
316*f6dc9357SAndroid Build Coastguard Worker        jnz      @B
317*f6dc9357SAndroid Build Coastguard Workerfunc_end:
318*f6dc9357SAndroid Build Coastguard Workerifdef x64
319*f6dc9357SAndroid Build Coastguard Worker        shl     r2, 32
320*f6dc9357SAndroid Build Coastguard Worker        xor     r0, r2
321*f6dc9357SAndroid Build Coastguard Workerendif
322*f6dc9357SAndroid Build Coastguard Worker        MY_POP_4_REGS
323*f6dc9357SAndroid Build Coastguard Workerendm
324*f6dc9357SAndroid Build Coastguard Worker
325*f6dc9357SAndroid Build Coastguard Worker
326*f6dc9357SAndroid Build Coastguard Worker; ALIGN_MASK is 3 or 7 bytes alignment:
327*f6dc9357SAndroid Build Coastguard WorkerALIGN_MASK  equ     (7 - (NUM_WORDS and 1) * 4)
328*f6dc9357SAndroid Build Coastguard Worker
329*f6dc9357SAndroid Build Coastguard Workerif (NUM_WORDS eq 1)
330*f6dc9357SAndroid Build Coastguard Worker
331*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT_T4 equ (NUM_WORDS * 4 + 4)
332*f6dc9357SAndroid Build Coastguard Worker
333*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
334*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG_BASE
335*f6dc9357SAndroid Build Coastguard Worker
336*f6dc9357SAndroid Build Coastguard Worker        cmp     rN, NUM_BYTES_LIMIT_T4 + ALIGN_MASK
337*f6dc9357SAndroid Build Coastguard Worker        jb      crc_end_4
338*f6dc9357SAndroid Build Coastguard Worker@@:
339*f6dc9357SAndroid Build Coastguard Worker        test    rD, ALIGN_MASK
340*f6dc9357SAndroid Build Coastguard Worker        jz      @F
341*f6dc9357SAndroid Build Coastguard Worker        CRC1b
342*f6dc9357SAndroid Build Coastguard Worker        jmp     @B
343*f6dc9357SAndroid Build Coastguard Worker@@:
344*f6dc9357SAndroid Build Coastguard Worker        xor     x0, [rD]
345*f6dc9357SAndroid Build Coastguard Worker        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT_T4 - 1)]
346*f6dc9357SAndroid Build Coastguard Worker        sub     rD, rN
347*f6dc9357SAndroid Build Coastguard Worker        add     rN, 4
348*f6dc9357SAndroid Build Coastguard Worker
349*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, x0
350*f6dc9357SAndroid Build Coastguard Workeralign 16
351*f6dc9357SAndroid Build Coastguard Worker@@:
352*f6dc9357SAndroid Build Coastguard Worker        mov     x3, SRCDAT4
353*f6dc9357SAndroid Build Coastguard Worker        xor     x3, x2
354*f6dc9357SAndroid Build Coastguard Worker        shr     x0, 8
355*f6dc9357SAndroid Build Coastguard Worker        CRC xor, mov, x3, x2, xA, 3
356*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, x0
357*f6dc9357SAndroid Build Coastguard Worker        shr     x0, 8
358*f6dc9357SAndroid Build Coastguard Worker        ; MOVZXHI  xA, x0
359*f6dc9357SAndroid Build Coastguard Worker        ; shr     x0, 16
360*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR x3, x2, xA, 2
361*f6dc9357SAndroid Build Coastguard Worker
362*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, x0
363*f6dc9357SAndroid Build Coastguard Worker        shr     x0, 8
364*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR x3, x2, xA, 1
365*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR x3, x2, x0, 0
366*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, x3
367*f6dc9357SAndroid Build Coastguard Worker        mov     x0, x3
368*f6dc9357SAndroid Build Coastguard Worker
369*f6dc9357SAndroid Build Coastguard Worker        add     rD, 4
370*f6dc9357SAndroid Build Coastguard Worker        jnc     @B
371*f6dc9357SAndroid Build Coastguard Worker
372*f6dc9357SAndroid Build Coastguard Worker        sub     rN, 4
373*f6dc9357SAndroid Build Coastguard Worker        add     rD, rN
374*f6dc9357SAndroid Build Coastguard Worker        xor     x0, [rD]
375*f6dc9357SAndroid Build Coastguard Worker        add     rN, NUM_BYTES_LIMIT_T4 - 1
376*f6dc9357SAndroid Build Coastguard Worker        sub     rN, rD
377*f6dc9357SAndroid Build Coastguard Worker        MY_EPILOG_BASE crc_end_4, func_end_4
378*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP
379*f6dc9357SAndroid Build Coastguard Worker
380*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1
381*f6dc9357SAndroid Build Coastguard Worker
382*f6dc9357SAndroid Build Coastguard WorkerSHR_X macro x, imm
383*f6dc9357SAndroid Build Coastguard Worker        shr x, imm
384*f6dc9357SAndroid Build Coastguard Workerendm
385*f6dc9357SAndroid Build Coastguard Worker
386*f6dc9357SAndroid Build Coastguard Worker
387*f6dc9357SAndroid Build Coastguard WorkerITER_1 macro v0, v1, a, off
388*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, a
389*f6dc9357SAndroid Build Coastguard Worker        SHR_X   a, 8
390*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off
391*f6dc9357SAndroid Build Coastguard Workerendm
392*f6dc9357SAndroid Build Coastguard Worker
393*f6dc9357SAndroid Build Coastguard Worker
394*f6dc9357SAndroid Build Coastguard WorkerITER_4 macro v0, v1, a, off
395*f6dc9357SAndroid Build Coastguard Workerif 0 eq 0
396*f6dc9357SAndroid Build Coastguard Worker        ITER_1  v0, v1, a, off + 3
397*f6dc9357SAndroid Build Coastguard Worker        ITER_1  v0, v1, a, off + 2
398*f6dc9357SAndroid Build Coastguard Worker        ITER_1  v0, v1, a, off + 1
399*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, a, off
400*f6dc9357SAndroid Build Coastguard Workerelseif 0 eq 0
401*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, a
402*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off + 3
403*f6dc9357SAndroid Build Coastguard Worker        mov     xA, a
404*f6dc9357SAndroid Build Coastguard Worker        ror     a, 16   ; 32-bit ror
405*f6dc9357SAndroid Build Coastguard Worker        shr     xA, 24
406*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off
407*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, a
408*f6dc9357SAndroid Build Coastguard Worker        SHR_X   a, 24
409*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off + 1
410*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, a, off + 2
411*f6dc9357SAndroid Build Coastguard Workerelse
412*f6dc9357SAndroid Build Coastguard Worker        ; MOVZXHI provides smaller code, but MOVZX_HI_BYTE is not fast instruction
413*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, a
414*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off + 3
415*f6dc9357SAndroid Build Coastguard Worker        MOVZXHI xA, a
416*f6dc9357SAndroid Build Coastguard Worker        SHR_X   a, 16
417*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off + 2
418*f6dc9357SAndroid Build Coastguard Worker        MOVZXLO xA, a
419*f6dc9357SAndroid Build Coastguard Worker        SHR_X   a, 8
420*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, xA, off + 1
421*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR v0, v1, a, off
422*f6dc9357SAndroid Build Coastguard Workerendif
423*f6dc9357SAndroid Build Coastguard Workerendm
424*f6dc9357SAndroid Build Coastguard Worker
425*f6dc9357SAndroid Build Coastguard Worker
426*f6dc9357SAndroid Build Coastguard Worker
427*f6dc9357SAndroid Build Coastguard WorkerITER_1_PAIR macro v0, v1, a0, a1, off
428*f6dc9357SAndroid Build Coastguard Worker        ITER_1 v0, v1, a0, off + 4
429*f6dc9357SAndroid Build Coastguard Worker        ITER_1 v0, v1, a1, off
430*f6dc9357SAndroid Build Coastguard Workerendm
431*f6dc9357SAndroid Build Coastguard Worker
432*f6dc9357SAndroid Build Coastguard Workersrc_rD_offset equ 8
433*f6dc9357SAndroid Build Coastguard WorkerSTEP_SIZE       equ     (NUM_WORDS * 4)
434*f6dc9357SAndroid Build Coastguard Worker
435*f6dc9357SAndroid Build Coastguard WorkerITER_12_NEXT macro op, index, v0, v1
436*f6dc9357SAndroid Build Coastguard Worker        op     v0, DWORD PTR [rD + (index + 1) * STEP_SIZE     - src_rD_offset]
437*f6dc9357SAndroid Build Coastguard Worker        op     v1, DWORD PTR [rD + (index + 1) * STEP_SIZE + 4 - src_rD_offset]
438*f6dc9357SAndroid Build Coastguard Workerendm
439*f6dc9357SAndroid Build Coastguard Worker
440*f6dc9357SAndroid Build Coastguard WorkerITER_12 macro index, a0, a1, v0, v1
441*f6dc9357SAndroid Build Coastguard Worker
442*f6dc9357SAndroid Build Coastguard Worker  if NUM_SKIP_BYTES  eq 0
443*f6dc9357SAndroid Build Coastguard Worker        ITER_12_NEXT mov, index, v0, v1
444*f6dc9357SAndroid Build Coastguard Worker  else
445*f6dc9357SAndroid Build Coastguard Worker    k = 0
446*f6dc9357SAndroid Build Coastguard Worker    while k lt NUM_SKIP_BYTES
447*f6dc9357SAndroid Build Coastguard Worker        movzx   xA, BYTE PTR [rD + (index) * STEP_SIZE + k + 8 - src_rD_offset]
448*f6dc9357SAndroid Build Coastguard Worker      if k eq 0
449*f6dc9357SAndroid Build Coastguard Worker        CRC mov, mov,   v0, v1, xA, NUM_SKIP_BYTES - 1 - k
450*f6dc9357SAndroid Build Coastguard Worker      else
451*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR         v0, v1, xA, NUM_SKIP_BYTES - 1 - k
452*f6dc9357SAndroid Build Coastguard Worker      endif
453*f6dc9357SAndroid Build Coastguard Worker      k = k + 1
454*f6dc9357SAndroid Build Coastguard Worker    endm
455*f6dc9357SAndroid Build Coastguard Worker        ITER_12_NEXT xor, index, v0, v1
456*f6dc9357SAndroid Build Coastguard Worker  endif
457*f6dc9357SAndroid Build Coastguard Worker
458*f6dc9357SAndroid Build Coastguard Workerif 0 eq 0
459*f6dc9357SAndroid Build Coastguard Worker        ITER_4  v0, v1, a0, NUM_SKIP_BYTES + 4
460*f6dc9357SAndroid Build Coastguard Worker        ITER_4  v0, v1, a1, NUM_SKIP_BYTES
461*f6dc9357SAndroid Build Coastguard Workerelse ; interleave version is faster/slower for different processors
462*f6dc9357SAndroid Build Coastguard Worker        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 3
463*f6dc9357SAndroid Build Coastguard Worker        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 2
464*f6dc9357SAndroid Build Coastguard Worker        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 1
465*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR     v0, v1, a0,     NUM_SKIP_BYTES + 4
466*f6dc9357SAndroid Build Coastguard Worker        CRC_XOR     v0, v1, a1,     NUM_SKIP_BYTES
467*f6dc9357SAndroid Build Coastguard Workerendif
468*f6dc9357SAndroid Build Coastguard Workerendm
469*f6dc9357SAndroid Build Coastguard Worker
470*f6dc9357SAndroid Build Coastguard Worker; we use (UNROLL_CNT > 1) to reduce read ports pressure (num_VAR reads)
471*f6dc9357SAndroid Build Coastguard WorkerUNROLL_CNT      equ     (2 * 1)
472*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ     (STEP_SIZE * UNROLL_CNT + 8)
473*f6dc9357SAndroid Build Coastguard Worker
474*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
475*f6dc9357SAndroid Build Coastguard Worker        MY_PROLOG_BASE
476*f6dc9357SAndroid Build Coastguard Worker
477*f6dc9357SAndroid Build Coastguard Worker        cmp     rN, NUM_BYTES_LIMIT + ALIGN_MASK
478*f6dc9357SAndroid Build Coastguard Worker        jb      crc_end_12
479*f6dc9357SAndroid Build Coastguard Worker@@:
480*f6dc9357SAndroid Build Coastguard Worker        test    rD, ALIGN_MASK
481*f6dc9357SAndroid Build Coastguard Worker        jz      @F
482*f6dc9357SAndroid Build Coastguard Worker        CRC1b
483*f6dc9357SAndroid Build Coastguard Worker        jmp     @B
484*f6dc9357SAndroid Build Coastguard Worker@@:
485*f6dc9357SAndroid Build Coastguard Worker        xor     x0, [rD]
486*f6dc9357SAndroid Build Coastguard Worker        xor     x2, [rD + 4]
487*f6dc9357SAndroid Build Coastguard Worker        add     rD, src_rD_offset
488*f6dc9357SAndroid Build Coastguard Worker        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
489*f6dc9357SAndroid Build Coastguard Worker        mov     num_VAR, rN
490*f6dc9357SAndroid Build Coastguard Worker
491*f6dc9357SAndroid Build Coastguard Workeralign 16
492*f6dc9357SAndroid Build Coastguard Worker@@:
493*f6dc9357SAndroid Build Coastguard Worker    i = 0
494*f6dc9357SAndroid Build Coastguard Worker    rept UNROLL_CNT
495*f6dc9357SAndroid Build Coastguard Worker      if (i and 1) eq 0
496*f6dc9357SAndroid Build Coastguard Worker        ITER_12     i, x0, x2,  x1, x3
497*f6dc9357SAndroid Build Coastguard Worker      else
498*f6dc9357SAndroid Build Coastguard Worker        ITER_12     i, x1, x3,  x0, x2
499*f6dc9357SAndroid Build Coastguard Worker      endif
500*f6dc9357SAndroid Build Coastguard Worker      i = i + 1
501*f6dc9357SAndroid Build Coastguard Worker    endm
502*f6dc9357SAndroid Build Coastguard Worker
503*f6dc9357SAndroid Build Coastguard Worker    if (UNROLL_CNT and 1)
504*f6dc9357SAndroid Build Coastguard Worker        mov     x0, x1
505*f6dc9357SAndroid Build Coastguard Worker        mov     x2, x3
506*f6dc9357SAndroid Build Coastguard Worker    endif
507*f6dc9357SAndroid Build Coastguard Worker        add     rD, STEP_SIZE * UNROLL_CNT
508*f6dc9357SAndroid Build Coastguard Worker        cmp     rD, num_VAR
509*f6dc9357SAndroid Build Coastguard Worker        jb      @B
510*f6dc9357SAndroid Build Coastguard Worker
511*f6dc9357SAndroid Build Coastguard Worker        mov     rN, num_VAR
512*f6dc9357SAndroid Build Coastguard Worker        add     rN, NUM_BYTES_LIMIT - 1
513*f6dc9357SAndroid Build Coastguard Worker        sub     rN, rD
514*f6dc9357SAndroid Build Coastguard Worker        sub     rD, src_rD_offset
515*f6dc9357SAndroid Build Coastguard Worker        xor     x0, [rD]
516*f6dc9357SAndroid Build Coastguard Worker        xor     x2, [rD + 4]
517*f6dc9357SAndroid Build Coastguard Worker
518*f6dc9357SAndroid Build Coastguard Worker        MY_EPILOG_BASE crc_end_12, func_end_12
519*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP
520*f6dc9357SAndroid Build Coastguard Worker
521*f6dc9357SAndroid Build Coastguard Workerendif ; (NUM_WORDS > 1)
522*f6dc9357SAndroid Build Coastguard Workerendif ; ! x64
523*f6dc9357SAndroid Build Coastguard Workerend
524