1*f6dc9357SAndroid Build Coastguard Worker; XzCrc64Opt.asm -- CRC64 calculation : optimized version 2*f6dc9357SAndroid Build Coastguard Worker; 2023-12-08 : Igor Pavlov : Public domain 3*f6dc9357SAndroid Build Coastguard Worker 4*f6dc9357SAndroid Build Coastguard Workerinclude 7zAsm.asm 5*f6dc9357SAndroid Build Coastguard Worker 6*f6dc9357SAndroid Build Coastguard WorkerMY_ASM_START 7*f6dc9357SAndroid Build Coastguard Worker 8*f6dc9357SAndroid Build Coastguard WorkerNUM_WORDS equ 3 9*f6dc9357SAndroid Build Coastguard Worker 10*f6dc9357SAndroid Build Coastguard Workerif (NUM_WORDS lt 1) or (NUM_WORDS gt 64) 11*f6dc9357SAndroid Build Coastguard Worker.err <num_words_IS_INCORRECT> 12*f6dc9357SAndroid Build Coastguard Workerendif 13*f6dc9357SAndroid Build Coastguard Worker 14*f6dc9357SAndroid Build Coastguard WorkerNUM_SKIP_BYTES equ ((NUM_WORDS - 2) * 4) 15*f6dc9357SAndroid Build Coastguard Worker 16*f6dc9357SAndroid Build Coastguard Worker 17*f6dc9357SAndroid Build Coastguard WorkerMOVZXLO macro dest:req, src:req 18*f6dc9357SAndroid Build Coastguard Worker movzx dest, @CatStr(src, _L) 19*f6dc9357SAndroid Build Coastguard Workerendm 20*f6dc9357SAndroid Build Coastguard Worker 21*f6dc9357SAndroid Build Coastguard WorkerMOVZXHI macro dest:req, src:req 22*f6dc9357SAndroid Build Coastguard Worker movzx dest, @CatStr(src, _H) 23*f6dc9357SAndroid Build Coastguard Workerendm 24*f6dc9357SAndroid Build Coastguard Worker 25*f6dc9357SAndroid Build Coastguard Worker 26*f6dc9357SAndroid Build Coastguard Workerifdef x64 27*f6dc9357SAndroid Build Coastguard Worker 28*f6dc9357SAndroid Build Coastguard WorkerrD equ r11 29*f6dc9357SAndroid Build Coastguard WorkerrN equ r10 30*f6dc9357SAndroid Build Coastguard WorkerrT equ r9 31*f6dc9357SAndroid Build Coastguard Worker 32*f6dc9357SAndroid Build Coastguard WorkerCRC_OP macro op:req, dest:req, src:req, t:req 33*f6dc9357SAndroid Build Coastguard Worker op dest, QWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t)] 34*f6dc9357SAndroid Build Coastguard Workerendm 35*f6dc9357SAndroid Build Coastguard Worker 36*f6dc9357SAndroid Build Coastguard WorkerCRC_XOR macro dest:req, src:req, t:req 37*f6dc9357SAndroid Build Coastguard Worker CRC_OP xor, dest, src, t 38*f6dc9357SAndroid Build Coastguard Workerendm 39*f6dc9357SAndroid Build Coastguard Worker 40*f6dc9357SAndroid Build Coastguard WorkerCRC_MOV macro dest:req, src:req, t:req 41*f6dc9357SAndroid Build Coastguard Worker CRC_OP mov, dest, src, t 42*f6dc9357SAndroid Build Coastguard Workerendm 43*f6dc9357SAndroid Build Coastguard Worker 44*f6dc9357SAndroid Build Coastguard WorkerCRC1b macro 45*f6dc9357SAndroid Build Coastguard Worker movzx x6, BYTE PTR [rD] 46*f6dc9357SAndroid Build Coastguard Worker inc rD 47*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x0 48*f6dc9357SAndroid Build Coastguard Worker xor x6, x3 49*f6dc9357SAndroid Build Coastguard Worker shr r0, 8 50*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r0, x6, 0 51*f6dc9357SAndroid Build Coastguard Worker dec rN 52*f6dc9357SAndroid Build Coastguard Workerendm 53*f6dc9357SAndroid Build Coastguard Worker 54*f6dc9357SAndroid Build Coastguard Worker 55*f6dc9357SAndroid Build Coastguard Worker; ALIGN_MASK is 3 or 7 bytes alignment: 56*f6dc9357SAndroid Build Coastguard WorkerALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4) 57*f6dc9357SAndroid Build Coastguard Worker 58*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS eq 1 59*f6dc9357SAndroid Build Coastguard Worker 60*f6dc9357SAndroid Build Coastguard Workersrc_rN_offset equ 4 61*f6dc9357SAndroid Build Coastguard Worker; + 4 for prefetching next 4-bytes after current iteration 62*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 4) 63*f6dc9357SAndroid Build Coastguard WorkerSRCDAT4 equ DWORD PTR [rN + rD * 1] 64*f6dc9357SAndroid Build Coastguard Worker 65*f6dc9357SAndroid Build Coastguard WorkerXOR_NEXT macro 66*f6dc9357SAndroid Build Coastguard Worker mov x1, [rD] 67*f6dc9357SAndroid Build Coastguard Worker xor r0, r1 68*f6dc9357SAndroid Build Coastguard Workerendm 69*f6dc9357SAndroid Build Coastguard Worker 70*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1 71*f6dc9357SAndroid Build Coastguard Worker 72*f6dc9357SAndroid Build Coastguard Workersrc_rN_offset equ 8 73*f6dc9357SAndroid Build Coastguard Worker; + 8 for prefetching next 8-bytes after current iteration 74*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 8) 75*f6dc9357SAndroid Build Coastguard Worker 76*f6dc9357SAndroid Build Coastguard WorkerXOR_NEXT macro 77*f6dc9357SAndroid Build Coastguard Worker xor r0, QWORD PTR [rD] ; 64-bit read, can be unaligned 78*f6dc9357SAndroid Build Coastguard Workerendm 79*f6dc9357SAndroid Build Coastguard Worker 80*f6dc9357SAndroid Build Coastguard Worker; 32-bit or 64-bit 81*f6dc9357SAndroid Build Coastguard WorkerLOAD_SRC_MULT4 macro dest:req, word_index:req 82*f6dc9357SAndroid Build Coastguard Worker mov dest, [rN + rD * 1 + 4 * (word_index) - src_rN_offset]; 83*f6dc9357SAndroid Build Coastguard Workerendm 84*f6dc9357SAndroid Build Coastguard Worker 85*f6dc9357SAndroid Build Coastguard Workerendif 86*f6dc9357SAndroid Build Coastguard Worker 87*f6dc9357SAndroid Build Coastguard Worker 88*f6dc9357SAndroid Build Coastguard Worker 89*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 4 90*f6dc9357SAndroid Build Coastguard Worker MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 91*f6dc9357SAndroid Build Coastguard Worker 92*f6dc9357SAndroid Build Coastguard Worker mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7 93*f6dc9357SAndroid Build Coastguard Worker mov rD, REG_ABI_PARAM_1 ; r11 <- r2 / r6 94*f6dc9357SAndroid Build Coastguard Worker mov rN, REG_ABI_PARAM_2 ; r10 <- r8 / r2 95*f6dc9357SAndroid Build Coastguard Workerif (IS_LINUX gt 0) 96*f6dc9357SAndroid Build Coastguard Worker mov rT, REG_ABI_PARAM_3 ; r9 <- r9 / r1 97*f6dc9357SAndroid Build Coastguard Workerendif 98*f6dc9357SAndroid Build Coastguard Worker 99*f6dc9357SAndroid Build Coastguard Worker cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK 100*f6dc9357SAndroid Build Coastguard Worker jb crc_end 101*f6dc9357SAndroid Build Coastguard Worker@@: 102*f6dc9357SAndroid Build Coastguard Worker test rD, ALIGN_MASK 103*f6dc9357SAndroid Build Coastguard Worker jz @F 104*f6dc9357SAndroid Build Coastguard Worker CRC1b 105*f6dc9357SAndroid Build Coastguard Worker jmp @B 106*f6dc9357SAndroid Build Coastguard Worker@@: 107*f6dc9357SAndroid Build Coastguard Worker XOR_NEXT 108*f6dc9357SAndroid Build Coastguard Worker lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)] 109*f6dc9357SAndroid Build Coastguard Worker sub rD, rN 110*f6dc9357SAndroid Build Coastguard Worker add rN, src_rN_offset 111*f6dc9357SAndroid Build Coastguard Worker 112*f6dc9357SAndroid Build Coastguard Workeralign 16 113*f6dc9357SAndroid Build Coastguard Worker@@: 114*f6dc9357SAndroid Build Coastguard Worker 115*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS eq 1 116*f6dc9357SAndroid Build Coastguard Worker 117*f6dc9357SAndroid Build Coastguard Worker mov x1, x0 118*f6dc9357SAndroid Build Coastguard Worker shr x1, 8 119*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x1 120*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x2, x0 121*f6dc9357SAndroid Build Coastguard Worker shr x1, 8 122*f6dc9357SAndroid Build Coastguard Worker shr r0, 32 123*f6dc9357SAndroid Build Coastguard Worker xor x0, SRCDAT4 124*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r0, x2, 3 125*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r0, x3, 2 126*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x2, x1 127*f6dc9357SAndroid Build Coastguard Worker shr x1, 8 128*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r0, x2, 1 129*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r0, x1, 0 130*f6dc9357SAndroid Build Coastguard Worker 131*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1 132*f6dc9357SAndroid Build Coastguard Worker 133*f6dc9357SAndroid Build Coastguard Workerif NUM_WORDS ne 2 134*f6dc9357SAndroid Build Coastguard Worker k = 2 135*f6dc9357SAndroid Build Coastguard Worker while k lt NUM_WORDS 136*f6dc9357SAndroid Build Coastguard Worker 137*f6dc9357SAndroid Build Coastguard Worker LOAD_SRC_MULT4 x1, k 138*f6dc9357SAndroid Build Coastguard Worker crc_op1 textequ <xor> 139*f6dc9357SAndroid Build Coastguard Worker 140*f6dc9357SAndroid Build Coastguard Worker if k eq 2 141*f6dc9357SAndroid Build Coastguard Worker if (NUM_WORDS and 1) 142*f6dc9357SAndroid Build Coastguard Worker LOAD_SRC_MULT4 x7, NUM_WORDS ; aligned 32-bit 143*f6dc9357SAndroid Build Coastguard Worker LOAD_SRC_MULT4 x6, NUM_WORDS + 1 ; aligned 32-bit 144*f6dc9357SAndroid Build Coastguard Worker shl r6, 32 145*f6dc9357SAndroid Build Coastguard Worker else 146*f6dc9357SAndroid Build Coastguard Worker LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit 147*f6dc9357SAndroid Build Coastguard Worker crc_op1 textequ <mov> 148*f6dc9357SAndroid Build Coastguard Worker endif 149*f6dc9357SAndroid Build Coastguard Worker endif 150*f6dc9357SAndroid Build Coastguard Worker table = 4 * (NUM_WORDS - 1 - k) 151*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x1 152*f6dc9357SAndroid Build Coastguard Worker CRC_OP crc_op1, r7, x3, 3 + table 153*f6dc9357SAndroid Build Coastguard Worker MOVZXHI x3, x1 154*f6dc9357SAndroid Build Coastguard Worker shr x1, 16 155*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r6, x3, 2 + table 156*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x1 157*f6dc9357SAndroid Build Coastguard Worker shr x1, 8 158*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r7, x3, 1 + table 159*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r6, x1, 0 + table 160*f6dc9357SAndroid Build Coastguard Worker k = k + 1 161*f6dc9357SAndroid Build Coastguard Worker endm 162*f6dc9357SAndroid Build Coastguard Worker crc_op2 textequ <xor> 163*f6dc9357SAndroid Build Coastguard Worker 164*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS == 2 165*f6dc9357SAndroid Build Coastguard Worker LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit 166*f6dc9357SAndroid Build Coastguard Worker crc_op2 textequ <mov> 167*f6dc9357SAndroid Build Coastguard Workerendif ; NUM_WORDS == 2 168*f6dc9357SAndroid Build Coastguard Worker 169*f6dc9357SAndroid Build Coastguard Worker MOVZXHI x3, x0 170*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x2, x0 171*f6dc9357SAndroid Build Coastguard Worker mov r1, r0 172*f6dc9357SAndroid Build Coastguard Worker shr r1, 32 173*f6dc9357SAndroid Build Coastguard Worker shr x0, 16 174*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r6, x2, NUM_SKIP_BYTES + 7 175*f6dc9357SAndroid Build Coastguard Worker CRC_OP crc_op2, r7, x3, NUM_SKIP_BYTES + 6 176*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x2, x0 177*f6dc9357SAndroid Build Coastguard Worker MOVZXHI x5, x1 178*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x1 179*f6dc9357SAndroid Build Coastguard Worker shr x0, 8 180*f6dc9357SAndroid Build Coastguard Worker shr x1, 16 181*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r7, x2, NUM_SKIP_BYTES + 5 182*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r6, x3, NUM_SKIP_BYTES + 3 183*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r7, x0, NUM_SKIP_BYTES + 4 184*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r6, x5, NUM_SKIP_BYTES + 2 185*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x2, x1 186*f6dc9357SAndroid Build Coastguard Worker shr x1, 8 187*f6dc9357SAndroid Build Coastguard Worker CRC_XOR r7, x2, NUM_SKIP_BYTES + 1 188*f6dc9357SAndroid Build Coastguard Worker CRC_MOV r0, x1, NUM_SKIP_BYTES + 0 189*f6dc9357SAndroid Build Coastguard Worker xor r0, r6 190*f6dc9357SAndroid Build Coastguard Worker xor r0, r7 191*f6dc9357SAndroid Build Coastguard Worker 192*f6dc9357SAndroid Build Coastguard Workerendif ; NUM_WORDS > 1 193*f6dc9357SAndroid Build Coastguard Worker add rD, NUM_WORDS * 4 194*f6dc9357SAndroid Build Coastguard Worker jnc @B 195*f6dc9357SAndroid Build Coastguard Worker 196*f6dc9357SAndroid Build Coastguard Worker sub rN, src_rN_offset 197*f6dc9357SAndroid Build Coastguard Worker add rD, rN 198*f6dc9357SAndroid Build Coastguard Worker XOR_NEXT 199*f6dc9357SAndroid Build Coastguard Worker add rN, NUM_BYTES_LIMIT - 1 200*f6dc9357SAndroid Build Coastguard Worker sub rN, rD 201*f6dc9357SAndroid Build Coastguard Worker 202*f6dc9357SAndroid Build Coastguard Workercrc_end: 203*f6dc9357SAndroid Build Coastguard Worker test rN, rN 204*f6dc9357SAndroid Build Coastguard Worker jz func_end 205*f6dc9357SAndroid Build Coastguard Worker@@: 206*f6dc9357SAndroid Build Coastguard Worker CRC1b 207*f6dc9357SAndroid Build Coastguard Worker jnz @B 208*f6dc9357SAndroid Build Coastguard Workerfunc_end: 209*f6dc9357SAndroid Build Coastguard Worker MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 210*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP 211*f6dc9357SAndroid Build Coastguard Worker 212*f6dc9357SAndroid Build Coastguard Worker 213*f6dc9357SAndroid Build Coastguard Worker 214*f6dc9357SAndroid Build Coastguard Workerelse 215*f6dc9357SAndroid Build Coastguard Worker; ================================================================== 216*f6dc9357SAndroid Build Coastguard Worker; x86 (32-bit) 217*f6dc9357SAndroid Build Coastguard Worker 218*f6dc9357SAndroid Build Coastguard WorkerrD equ r7 219*f6dc9357SAndroid Build Coastguard WorkerrN equ r1 220*f6dc9357SAndroid Build Coastguard WorkerrT equ r5 221*f6dc9357SAndroid Build Coastguard Worker 222*f6dc9357SAndroid Build Coastguard WorkerxA equ x6 223*f6dc9357SAndroid Build Coastguard WorkerxA_R equ r6 224*f6dc9357SAndroid Build Coastguard Worker 225*f6dc9357SAndroid Build Coastguard Workerifdef x64 226*f6dc9357SAndroid Build Coastguard Worker num_VAR equ r8 227*f6dc9357SAndroid Build Coastguard Workerelse 228*f6dc9357SAndroid Build Coastguard Worker 229*f6dc9357SAndroid Build Coastguard Workercrc_OFFS equ (REG_SIZE * 5) 230*f6dc9357SAndroid Build Coastguard Worker 231*f6dc9357SAndroid Build Coastguard Workerif (IS_CDECL gt 0) or (IS_LINUX gt 0) 232*f6dc9357SAndroid Build Coastguard Worker ; cdecl or (GNU fastcall) stack: 233*f6dc9357SAndroid Build Coastguard Worker ; (UInt32 *) table 234*f6dc9357SAndroid Build Coastguard Worker ; size_t size 235*f6dc9357SAndroid Build Coastguard Worker ; void * data 236*f6dc9357SAndroid Build Coastguard Worker ; (UInt64) crc 237*f6dc9357SAndroid Build Coastguard Worker ; ret-ip <-(r4) 238*f6dc9357SAndroid Build Coastguard Worker data_OFFS equ (8 + crc_OFFS) 239*f6dc9357SAndroid Build Coastguard Worker size_OFFS equ (REG_SIZE + data_OFFS) 240*f6dc9357SAndroid Build Coastguard Worker table_OFFS equ (REG_SIZE + size_OFFS) 241*f6dc9357SAndroid Build Coastguard Worker num_VAR equ [r4 + size_OFFS] 242*f6dc9357SAndroid Build Coastguard Worker table_VAR equ [r4 + table_OFFS] 243*f6dc9357SAndroid Build Coastguard Workerelse 244*f6dc9357SAndroid Build Coastguard Worker ; Windows fastcall: 245*f6dc9357SAndroid Build Coastguard Worker ; r1 = data, r2 = size 246*f6dc9357SAndroid Build Coastguard Worker ; stack: 247*f6dc9357SAndroid Build Coastguard Worker ; (UInt32 *) table 248*f6dc9357SAndroid Build Coastguard Worker ; (UInt64) crc 249*f6dc9357SAndroid Build Coastguard Worker ; ret-ip <-(r4) 250*f6dc9357SAndroid Build Coastguard Worker table_OFFS equ (8 + crc_OFFS) 251*f6dc9357SAndroid Build Coastguard Worker table_VAR equ [r4 + table_OFFS] 252*f6dc9357SAndroid Build Coastguard Worker num_VAR equ table_VAR 253*f6dc9357SAndroid Build Coastguard Workerendif 254*f6dc9357SAndroid Build Coastguard Workerendif ; x64 255*f6dc9357SAndroid Build Coastguard Worker 256*f6dc9357SAndroid Build Coastguard WorkerSRCDAT4 equ DWORD PTR [rN + rD * 1] 257*f6dc9357SAndroid Build Coastguard Worker 258*f6dc9357SAndroid Build Coastguard WorkerCRC_1 macro op:req, dest:req, src:req, t:req, word_index:req 259*f6dc9357SAndroid Build Coastguard Worker op dest, DWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t) + (word_index) * 4] 260*f6dc9357SAndroid Build Coastguard Workerendm 261*f6dc9357SAndroid Build Coastguard Worker 262*f6dc9357SAndroid Build Coastguard WorkerCRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req 263*f6dc9357SAndroid Build Coastguard Worker CRC_1 op0, dest0, src, t, 0 264*f6dc9357SAndroid Build Coastguard Worker CRC_1 op1, dest1, src, t, 1 265*f6dc9357SAndroid Build Coastguard Workerendm 266*f6dc9357SAndroid Build Coastguard Worker 267*f6dc9357SAndroid Build Coastguard WorkerCRC_XOR macro dest0:req, dest1:req, src:req, t:req 268*f6dc9357SAndroid Build Coastguard Worker CRC xor, xor, dest0, dest1, src, t 269*f6dc9357SAndroid Build Coastguard Workerendm 270*f6dc9357SAndroid Build Coastguard Worker 271*f6dc9357SAndroid Build Coastguard Worker 272*f6dc9357SAndroid Build Coastguard WorkerCRC1b macro 273*f6dc9357SAndroid Build Coastguard Worker movzx xA, BYTE PTR [rD] 274*f6dc9357SAndroid Build Coastguard Worker inc rD 275*f6dc9357SAndroid Build Coastguard Worker MOVZXLO x3, x0 276*f6dc9357SAndroid Build Coastguard Worker xor xA, x3 277*f6dc9357SAndroid Build Coastguard Worker shrd x0, x2, 8 278*f6dc9357SAndroid Build Coastguard Worker shr x2, 8 279*f6dc9357SAndroid Build Coastguard Worker CRC_XOR x0, x2, xA, 0 280*f6dc9357SAndroid Build Coastguard Worker dec rN 281*f6dc9357SAndroid Build Coastguard Workerendm 282*f6dc9357SAndroid Build Coastguard Worker 283*f6dc9357SAndroid Build Coastguard Worker 284*f6dc9357SAndroid Build Coastguard WorkerMY_PROLOG_BASE macro 285*f6dc9357SAndroid Build Coastguard Worker MY_PUSH_4_REGS 286*f6dc9357SAndroid Build Coastguard Workerifdef x64 287*f6dc9357SAndroid Build Coastguard Worker mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7 288*f6dc9357SAndroid Build Coastguard Worker mov rT, REG_ABI_PARAM_3 ; r5 <- r9 / r1 289*f6dc9357SAndroid Build Coastguard Worker mov rN, REG_ABI_PARAM_2 ; r1 <- r8 / r2 290*f6dc9357SAndroid Build Coastguard Worker mov rD, REG_ABI_PARAM_1 ; r7 <- r2 / r6 291*f6dc9357SAndroid Build Coastguard Worker mov r2, r0 292*f6dc9357SAndroid Build Coastguard Worker shr r2, 32 293*f6dc9357SAndroid Build Coastguard Worker mov x0, x0 294*f6dc9357SAndroid Build Coastguard Workerelse 295*f6dc9357SAndroid Build Coastguard Worker if (IS_CDECL gt 0) or (IS_LINUX gt 0) 296*f6dc9357SAndroid Build Coastguard Worker proc_numParams = proc_numParams + 2 ; for ABI_LINUX 297*f6dc9357SAndroid Build Coastguard Worker mov rN, [r4 + size_OFFS] 298*f6dc9357SAndroid Build Coastguard Worker mov rD, [r4 + data_OFFS] 299*f6dc9357SAndroid Build Coastguard Worker else 300*f6dc9357SAndroid Build Coastguard Worker mov rD, REG_ABI_PARAM_0 ; r7 <- r1 : (data) 301*f6dc9357SAndroid Build Coastguard Worker mov rN, REG_ABI_PARAM_1 ; r1 <- r2 : (size) 302*f6dc9357SAndroid Build Coastguard Worker endif 303*f6dc9357SAndroid Build Coastguard Worker mov x0, [r4 + crc_OFFS] 304*f6dc9357SAndroid Build Coastguard Worker mov x2, [r4 + crc_OFFS + 4] 305*f6dc9357SAndroid Build Coastguard Worker mov rT, table_VAR 306*f6dc9357SAndroid Build Coastguard Workerendif 307*f6dc9357SAndroid Build Coastguard Workerendm 308*f6dc9357SAndroid Build Coastguard Worker 309*f6dc9357SAndroid Build Coastguard Worker 310*f6dc9357SAndroid Build Coastguard WorkerMY_EPILOG_BASE macro crc_end:req, func_end:req 311*f6dc9357SAndroid Build Coastguard Workercrc_end: 312*f6dc9357SAndroid Build Coastguard Worker test rN, rN 313*f6dc9357SAndroid Build Coastguard Worker jz func_end 314*f6dc9357SAndroid Build Coastguard Worker@@: 315*f6dc9357SAndroid Build Coastguard Worker CRC1b 316*f6dc9357SAndroid Build Coastguard Worker jnz @B 317*f6dc9357SAndroid Build Coastguard Workerfunc_end: 318*f6dc9357SAndroid Build Coastguard Workerifdef x64 319*f6dc9357SAndroid Build Coastguard Worker shl r2, 32 320*f6dc9357SAndroid Build Coastguard Worker xor r0, r2 321*f6dc9357SAndroid Build Coastguard Workerendif 322*f6dc9357SAndroid Build Coastguard Worker MY_POP_4_REGS 323*f6dc9357SAndroid Build Coastguard Workerendm 324*f6dc9357SAndroid Build Coastguard Worker 325*f6dc9357SAndroid Build Coastguard Worker 326*f6dc9357SAndroid Build Coastguard Worker; ALIGN_MASK is 3 or 7 bytes alignment: 327*f6dc9357SAndroid Build Coastguard WorkerALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4) 328*f6dc9357SAndroid Build Coastguard Worker 329*f6dc9357SAndroid Build Coastguard Workerif (NUM_WORDS eq 1) 330*f6dc9357SAndroid Build Coastguard Worker 331*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT_T4 equ (NUM_WORDS * 4 + 4) 332*f6dc9357SAndroid Build Coastguard Worker 333*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5 334*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG_BASE 335*f6dc9357SAndroid Build Coastguard Worker 336*f6dc9357SAndroid Build Coastguard Worker cmp rN, NUM_BYTES_LIMIT_T4 + ALIGN_MASK 337*f6dc9357SAndroid Build Coastguard Worker jb crc_end_4 338*f6dc9357SAndroid Build Coastguard Worker@@: 339*f6dc9357SAndroid Build Coastguard Worker test rD, ALIGN_MASK 340*f6dc9357SAndroid Build Coastguard Worker jz @F 341*f6dc9357SAndroid Build Coastguard Worker CRC1b 342*f6dc9357SAndroid Build Coastguard Worker jmp @B 343*f6dc9357SAndroid Build Coastguard Worker@@: 344*f6dc9357SAndroid Build Coastguard Worker xor x0, [rD] 345*f6dc9357SAndroid Build Coastguard Worker lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT_T4 - 1)] 346*f6dc9357SAndroid Build Coastguard Worker sub rD, rN 347*f6dc9357SAndroid Build Coastguard Worker add rN, 4 348*f6dc9357SAndroid Build Coastguard Worker 349*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, x0 350*f6dc9357SAndroid Build Coastguard Workeralign 16 351*f6dc9357SAndroid Build Coastguard Worker@@: 352*f6dc9357SAndroid Build Coastguard Worker mov x3, SRCDAT4 353*f6dc9357SAndroid Build Coastguard Worker xor x3, x2 354*f6dc9357SAndroid Build Coastguard Worker shr x0, 8 355*f6dc9357SAndroid Build Coastguard Worker CRC xor, mov, x3, x2, xA, 3 356*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, x0 357*f6dc9357SAndroid Build Coastguard Worker shr x0, 8 358*f6dc9357SAndroid Build Coastguard Worker ; MOVZXHI xA, x0 359*f6dc9357SAndroid Build Coastguard Worker ; shr x0, 16 360*f6dc9357SAndroid Build Coastguard Worker CRC_XOR x3, x2, xA, 2 361*f6dc9357SAndroid Build Coastguard Worker 362*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, x0 363*f6dc9357SAndroid Build Coastguard Worker shr x0, 8 364*f6dc9357SAndroid Build Coastguard Worker CRC_XOR x3, x2, xA, 1 365*f6dc9357SAndroid Build Coastguard Worker CRC_XOR x3, x2, x0, 0 366*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, x3 367*f6dc9357SAndroid Build Coastguard Worker mov x0, x3 368*f6dc9357SAndroid Build Coastguard Worker 369*f6dc9357SAndroid Build Coastguard Worker add rD, 4 370*f6dc9357SAndroid Build Coastguard Worker jnc @B 371*f6dc9357SAndroid Build Coastguard Worker 372*f6dc9357SAndroid Build Coastguard Worker sub rN, 4 373*f6dc9357SAndroid Build Coastguard Worker add rD, rN 374*f6dc9357SAndroid Build Coastguard Worker xor x0, [rD] 375*f6dc9357SAndroid Build Coastguard Worker add rN, NUM_BYTES_LIMIT_T4 - 1 376*f6dc9357SAndroid Build Coastguard Worker sub rN, rD 377*f6dc9357SAndroid Build Coastguard Worker MY_EPILOG_BASE crc_end_4, func_end_4 378*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP 379*f6dc9357SAndroid Build Coastguard Worker 380*f6dc9357SAndroid Build Coastguard Workerelse ; NUM_WORDS > 1 381*f6dc9357SAndroid Build Coastguard Worker 382*f6dc9357SAndroid Build Coastguard WorkerSHR_X macro x, imm 383*f6dc9357SAndroid Build Coastguard Worker shr x, imm 384*f6dc9357SAndroid Build Coastguard Workerendm 385*f6dc9357SAndroid Build Coastguard Worker 386*f6dc9357SAndroid Build Coastguard Worker 387*f6dc9357SAndroid Build Coastguard WorkerITER_1 macro v0, v1, a, off 388*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, a 389*f6dc9357SAndroid Build Coastguard Worker SHR_X a, 8 390*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off 391*f6dc9357SAndroid Build Coastguard Workerendm 392*f6dc9357SAndroid Build Coastguard Worker 393*f6dc9357SAndroid Build Coastguard Worker 394*f6dc9357SAndroid Build Coastguard WorkerITER_4 macro v0, v1, a, off 395*f6dc9357SAndroid Build Coastguard Workerif 0 eq 0 396*f6dc9357SAndroid Build Coastguard Worker ITER_1 v0, v1, a, off + 3 397*f6dc9357SAndroid Build Coastguard Worker ITER_1 v0, v1, a, off + 2 398*f6dc9357SAndroid Build Coastguard Worker ITER_1 v0, v1, a, off + 1 399*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, a, off 400*f6dc9357SAndroid Build Coastguard Workerelseif 0 eq 0 401*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, a 402*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off + 3 403*f6dc9357SAndroid Build Coastguard Worker mov xA, a 404*f6dc9357SAndroid Build Coastguard Worker ror a, 16 ; 32-bit ror 405*f6dc9357SAndroid Build Coastguard Worker shr xA, 24 406*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off 407*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, a 408*f6dc9357SAndroid Build Coastguard Worker SHR_X a, 24 409*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off + 1 410*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, a, off + 2 411*f6dc9357SAndroid Build Coastguard Workerelse 412*f6dc9357SAndroid Build Coastguard Worker ; MOVZXHI provides smaller code, but MOVZX_HI_BYTE is not fast instruction 413*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, a 414*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off + 3 415*f6dc9357SAndroid Build Coastguard Worker MOVZXHI xA, a 416*f6dc9357SAndroid Build Coastguard Worker SHR_X a, 16 417*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off + 2 418*f6dc9357SAndroid Build Coastguard Worker MOVZXLO xA, a 419*f6dc9357SAndroid Build Coastguard Worker SHR_X a, 8 420*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, off + 1 421*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, a, off 422*f6dc9357SAndroid Build Coastguard Workerendif 423*f6dc9357SAndroid Build Coastguard Workerendm 424*f6dc9357SAndroid Build Coastguard Worker 425*f6dc9357SAndroid Build Coastguard Worker 426*f6dc9357SAndroid Build Coastguard Worker 427*f6dc9357SAndroid Build Coastguard WorkerITER_1_PAIR macro v0, v1, a0, a1, off 428*f6dc9357SAndroid Build Coastguard Worker ITER_1 v0, v1, a0, off + 4 429*f6dc9357SAndroid Build Coastguard Worker ITER_1 v0, v1, a1, off 430*f6dc9357SAndroid Build Coastguard Workerendm 431*f6dc9357SAndroid Build Coastguard Worker 432*f6dc9357SAndroid Build Coastguard Workersrc_rD_offset equ 8 433*f6dc9357SAndroid Build Coastguard WorkerSTEP_SIZE equ (NUM_WORDS * 4) 434*f6dc9357SAndroid Build Coastguard Worker 435*f6dc9357SAndroid Build Coastguard WorkerITER_12_NEXT macro op, index, v0, v1 436*f6dc9357SAndroid Build Coastguard Worker op v0, DWORD PTR [rD + (index + 1) * STEP_SIZE - src_rD_offset] 437*f6dc9357SAndroid Build Coastguard Worker op v1, DWORD PTR [rD + (index + 1) * STEP_SIZE + 4 - src_rD_offset] 438*f6dc9357SAndroid Build Coastguard Workerendm 439*f6dc9357SAndroid Build Coastguard Worker 440*f6dc9357SAndroid Build Coastguard WorkerITER_12 macro index, a0, a1, v0, v1 441*f6dc9357SAndroid Build Coastguard Worker 442*f6dc9357SAndroid Build Coastguard Worker if NUM_SKIP_BYTES eq 0 443*f6dc9357SAndroid Build Coastguard Worker ITER_12_NEXT mov, index, v0, v1 444*f6dc9357SAndroid Build Coastguard Worker else 445*f6dc9357SAndroid Build Coastguard Worker k = 0 446*f6dc9357SAndroid Build Coastguard Worker while k lt NUM_SKIP_BYTES 447*f6dc9357SAndroid Build Coastguard Worker movzx xA, BYTE PTR [rD + (index) * STEP_SIZE + k + 8 - src_rD_offset] 448*f6dc9357SAndroid Build Coastguard Worker if k eq 0 449*f6dc9357SAndroid Build Coastguard Worker CRC mov, mov, v0, v1, xA, NUM_SKIP_BYTES - 1 - k 450*f6dc9357SAndroid Build Coastguard Worker else 451*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, xA, NUM_SKIP_BYTES - 1 - k 452*f6dc9357SAndroid Build Coastguard Worker endif 453*f6dc9357SAndroid Build Coastguard Worker k = k + 1 454*f6dc9357SAndroid Build Coastguard Worker endm 455*f6dc9357SAndroid Build Coastguard Worker ITER_12_NEXT xor, index, v0, v1 456*f6dc9357SAndroid Build Coastguard Worker endif 457*f6dc9357SAndroid Build Coastguard Worker 458*f6dc9357SAndroid Build Coastguard Workerif 0 eq 0 459*f6dc9357SAndroid Build Coastguard Worker ITER_4 v0, v1, a0, NUM_SKIP_BYTES + 4 460*f6dc9357SAndroid Build Coastguard Worker ITER_4 v0, v1, a1, NUM_SKIP_BYTES 461*f6dc9357SAndroid Build Coastguard Workerelse ; interleave version is faster/slower for different processors 462*f6dc9357SAndroid Build Coastguard Worker ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 3 463*f6dc9357SAndroid Build Coastguard Worker ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 2 464*f6dc9357SAndroid Build Coastguard Worker ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 1 465*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, a0, NUM_SKIP_BYTES + 4 466*f6dc9357SAndroid Build Coastguard Worker CRC_XOR v0, v1, a1, NUM_SKIP_BYTES 467*f6dc9357SAndroid Build Coastguard Workerendif 468*f6dc9357SAndroid Build Coastguard Workerendm 469*f6dc9357SAndroid Build Coastguard Worker 470*f6dc9357SAndroid Build Coastguard Worker; we use (UNROLL_CNT > 1) to reduce read ports pressure (num_VAR reads) 471*f6dc9357SAndroid Build Coastguard WorkerUNROLL_CNT equ (2 * 1) 472*f6dc9357SAndroid Build Coastguard WorkerNUM_BYTES_LIMIT equ (STEP_SIZE * UNROLL_CNT + 8) 473*f6dc9357SAndroid Build Coastguard Worker 474*f6dc9357SAndroid Build Coastguard WorkerMY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5 475*f6dc9357SAndroid Build Coastguard Worker MY_PROLOG_BASE 476*f6dc9357SAndroid Build Coastguard Worker 477*f6dc9357SAndroid Build Coastguard Worker cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK 478*f6dc9357SAndroid Build Coastguard Worker jb crc_end_12 479*f6dc9357SAndroid Build Coastguard Worker@@: 480*f6dc9357SAndroid Build Coastguard Worker test rD, ALIGN_MASK 481*f6dc9357SAndroid Build Coastguard Worker jz @F 482*f6dc9357SAndroid Build Coastguard Worker CRC1b 483*f6dc9357SAndroid Build Coastguard Worker jmp @B 484*f6dc9357SAndroid Build Coastguard Worker@@: 485*f6dc9357SAndroid Build Coastguard Worker xor x0, [rD] 486*f6dc9357SAndroid Build Coastguard Worker xor x2, [rD + 4] 487*f6dc9357SAndroid Build Coastguard Worker add rD, src_rD_offset 488*f6dc9357SAndroid Build Coastguard Worker lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)] 489*f6dc9357SAndroid Build Coastguard Worker mov num_VAR, rN 490*f6dc9357SAndroid Build Coastguard Worker 491*f6dc9357SAndroid Build Coastguard Workeralign 16 492*f6dc9357SAndroid Build Coastguard Worker@@: 493*f6dc9357SAndroid Build Coastguard Worker i = 0 494*f6dc9357SAndroid Build Coastguard Worker rept UNROLL_CNT 495*f6dc9357SAndroid Build Coastguard Worker if (i and 1) eq 0 496*f6dc9357SAndroid Build Coastguard Worker ITER_12 i, x0, x2, x1, x3 497*f6dc9357SAndroid Build Coastguard Worker else 498*f6dc9357SAndroid Build Coastguard Worker ITER_12 i, x1, x3, x0, x2 499*f6dc9357SAndroid Build Coastguard Worker endif 500*f6dc9357SAndroid Build Coastguard Worker i = i + 1 501*f6dc9357SAndroid Build Coastguard Worker endm 502*f6dc9357SAndroid Build Coastguard Worker 503*f6dc9357SAndroid Build Coastguard Worker if (UNROLL_CNT and 1) 504*f6dc9357SAndroid Build Coastguard Worker mov x0, x1 505*f6dc9357SAndroid Build Coastguard Worker mov x2, x3 506*f6dc9357SAndroid Build Coastguard Worker endif 507*f6dc9357SAndroid Build Coastguard Worker add rD, STEP_SIZE * UNROLL_CNT 508*f6dc9357SAndroid Build Coastguard Worker cmp rD, num_VAR 509*f6dc9357SAndroid Build Coastguard Worker jb @B 510*f6dc9357SAndroid Build Coastguard Worker 511*f6dc9357SAndroid Build Coastguard Worker mov rN, num_VAR 512*f6dc9357SAndroid Build Coastguard Worker add rN, NUM_BYTES_LIMIT - 1 513*f6dc9357SAndroid Build Coastguard Worker sub rN, rD 514*f6dc9357SAndroid Build Coastguard Worker sub rD, src_rD_offset 515*f6dc9357SAndroid Build Coastguard Worker xor x0, [rD] 516*f6dc9357SAndroid Build Coastguard Worker xor x2, [rD + 4] 517*f6dc9357SAndroid Build Coastguard Worker 518*f6dc9357SAndroid Build Coastguard Worker MY_EPILOG_BASE crc_end_12, func_end_12 519*f6dc9357SAndroid Build Coastguard WorkerMY_ENDP 520*f6dc9357SAndroid Build Coastguard Worker 521*f6dc9357SAndroid Build Coastguard Workerendif ; (NUM_WORDS > 1) 522*f6dc9357SAndroid Build Coastguard Workerendif ; ! x64 523*f6dc9357SAndroid Build Coastguard Workerend 524