1 /* adler32.c -- compute the Adler-32 checksum of a data stream
2 * Copyright (C) 1995-2011, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 /* @(#) $Id$ */
7
8 #include "zutil.h"
9
10 #define BASE 65521U /* largest prime smaller than 65536 */
11 #define NMAX 5552
12 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
13
14 #define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
15 #define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
16 #define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
17 #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
18 #define DO16(buf) DO8(buf,0); DO8(buf,8);
19
20 /* use NO_DIVIDE if your processor does not do division in hardware --
21 try it both ways to see which is faster */
22 #ifdef NO_DIVIDE
23 /* note that this assumes BASE is 65521, where 65536 % 65521 == 15
24 (thank you to John Reiser for pointing this out) */
25 # define CHOP(a) \
26 do { \
27 unsigned long tmp = a >> 16; \
28 a &= 0xffffUL; \
29 a += (tmp << 4) - tmp; \
30 } while (0)
31 # define MOD28(a) \
32 do { \
33 CHOP(a); \
34 if (a >= BASE) a -= BASE; \
35 } while (0)
36 # define MOD(a) \
37 do { \
38 CHOP(a); \
39 MOD28(a); \
40 } while (0)
41 # define MOD63(a) \
42 do { /* this assumes a is not negative */ \
43 z_off64_t tmp = a >> 32; \
44 a &= 0xffffffffL; \
45 a += (tmp << 8) - (tmp << 5) + tmp; \
46 tmp = a >> 16; \
47 a &= 0xffffL; \
48 a += (tmp << 4) - tmp; \
49 tmp = a >> 16; \
50 a &= 0xffffL; \
51 a += (tmp << 4) - tmp; \
52 if (a >= BASE) a -= BASE; \
53 } while (0)
54 #else
55 # define MOD(a) a %= BASE
56 # define MOD28(a) a %= BASE
57 # define MOD63(a) a %= BASE
58 #endif
59
60 #include "cpu_features.h"
61 #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) || defined(ADLER32_SIMD_RVV)
62 #include "adler32_simd.h"
63 #endif
64
65 /* ========================================================================= */
adler32_z(uLong adler,const Bytef * buf,z_size_t len)66 uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) {
67 unsigned long sum2;
68 unsigned n;
69 /* TODO(cavalcantii): verify if this lengths are optimal for current CPUs. */
70 #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) \
71 || defined(ADLER32_SIMD_RVV)
72 #if defined(ADLER32_SIMD_SSSE3)
73 if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
74 #elif defined(ADLER32_SIMD_NEON)
75 if (buf != Z_NULL && len >= 64)
76 #elif defined(ADLER32_SIMD_RVV)
77 if (buf != Z_NULL && len >= 32 && riscv_cpu_enable_rvv)
78 #endif
79 return adler32_simd_(adler, buf, len);
80 #endif
81
82 /* split Adler-32 into component sums */
83 sum2 = (adler >> 16) & 0xffff;
84 adler &= 0xffff;
85
86 /* in case user likes doing a byte at a time, keep it fast */
87 if (len == 1) {
88 adler += buf[0];
89 if (adler >= BASE)
90 adler -= BASE;
91 sum2 += adler;
92 if (sum2 >= BASE)
93 sum2 -= BASE;
94 return adler | (sum2 << 16);
95 }
96
97 #if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) \
98 || defined(RISCV_RVV)
99 /*
100 * Use SIMD to compute the adler32. Since this function can be
101 * freely used, check CPU features here. zlib convention is to
102 * call adler32(0, NULL, 0), before making calls to adler32().
103 * So this is a good early (and infrequent) place to cache CPU
104 * features for those later, more interesting adler32() calls.
105 */
106 if (buf == Z_NULL) {
107 if (!len) /* Assume user is calling adler32(0, NULL, 0); */
108 cpu_check_features();
109 return 1L;
110 }
111 #else
112 /* initial Adler-32 value (deferred check for len == 1 speed) */
113 if (buf == Z_NULL)
114 return 1L;
115 #endif
116
117 /* in case short lengths are provided, keep it somewhat fast */
118 if (len < 16) {
119 while (len--) {
120 adler += *buf++;
121 sum2 += adler;
122 }
123 if (adler >= BASE)
124 adler -= BASE;
125 MOD28(sum2); /* only added so many BASE's */
126 return adler | (sum2 << 16);
127 }
128
129 /* do length NMAX blocks -- requires just one modulo operation */
130 while (len >= NMAX) {
131 len -= NMAX;
132 n = NMAX / 16; /* NMAX is divisible by 16 */
133 do {
134 DO16(buf); /* 16 sums unrolled */
135 buf += 16;
136 } while (--n);
137 MOD(adler);
138 MOD(sum2);
139 }
140
141 /* do remaining bytes (less than NMAX, still just one modulo) */
142 if (len) { /* avoid modulos if none remaining */
143 while (len >= 16) {
144 len -= 16;
145 DO16(buf);
146 buf += 16;
147 }
148 while (len--) {
149 adler += *buf++;
150 sum2 += adler;
151 }
152 MOD(adler);
153 MOD(sum2);
154 }
155
156 /* return recombined sums */
157 return adler | (sum2 << 16);
158 }
159
160 /* ========================================================================= */
adler32(uLong adler,const Bytef * buf,uInt len)161 uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) {
162 return adler32_z(adler, buf, len);
163 }
164
165 /* ========================================================================= */
adler32_combine_(uLong adler1,uLong adler2,z_off64_t len2)166 local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) {
167 unsigned long sum1;
168 unsigned long sum2;
169 unsigned rem;
170
171 /* for negative len, return invalid adler32 as a clue for debugging */
172 if (len2 < 0)
173 return 0xffffffffUL;
174
175 /* the derivation of this formula is left as an exercise for the reader */
176 MOD63(len2); /* assumes len2 >= 0 */
177 rem = (unsigned)len2;
178 sum1 = adler1 & 0xffff;
179 sum2 = rem * sum1;
180 MOD(sum2);
181 sum1 += (adler2 & 0xffff) + BASE - 1;
182 sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
183 if (sum1 >= BASE) sum1 -= BASE;
184 if (sum1 >= BASE) sum1 -= BASE;
185 if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
186 if (sum2 >= BASE) sum2 -= BASE;
187 return sum1 | (sum2 << 16);
188 }
189
190 /* ========================================================================= */
adler32_combine(uLong adler1,uLong adler2,z_off_t len2)191 uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) {
192 return adler32_combine_(adler1, adler2, len2);
193 }
194
adler32_combine64(uLong adler1,uLong adler2,z_off64_t len2)195 uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) {
196 return adler32_combine_(adler1, adler2, len2);
197 }
198