Lines Matching +full:- +full:qq
1 ; 7zCrcOpt.asm -- CRC32 calculation : optimized version
2 ; 2023-12-08 : Igor Pavlov : Public domain
58 ; movzx x0, x0_L - is slow in some cpus (ivb), if same register for src and dest
74 movzx dest, byte ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
78 movzx dest, word ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
83 ; paired memory loads give 1-3% speed gain, but it uses more registers
115 ; 32-bit load is better if there is only one read port (core2)
117 mov x3, dword ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + 0)]
131 LAST equ (4 * (NUM_WORDS - 1))
133 CRC_ITER macro qq, nn, iter
137 rept NUM_WORDS - 1
142 MOVZXLO x6, qq
143 mov x3, qq
147 ror qq, 16
148 MOVZXLO x6, qq
149 shr qq, 24
151 if ((UNROLL_CNT and 1) eq 1) and (iter eq (UNROLL_CNT - 1))
152 CRC_MOV qq, qq, LAST + 2
153 xor qq, nn
155 CRC_XOR nn, qq, LAST + 2
160 ; + 4 for prefetching next 4-bytes after current iteration
196 lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
218 add rN, NUM_BYTES_LIMIT - 1
220 ; 4-byte version
242 add rN, 4 - 1