xref: /aosp_15_r20/external/coreboot/util/cbfstool/lz4/lib/xxhash.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /*
2 xxHash - Fast Hash algorithm
3 Copyright (C) 2012-2015, Yann Collet
4 
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10 
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 You can contact the author at :
31 - xxHash source repository : https://github.com/Cyan4973/xxHash
32 */
33 
34 
35 /**************************************
36 *  Tuning parameters
37 **************************************/
38 /* XXH_FORCE_MEMORY_ACCESS
39  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
40  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
41  * The below switch allow to select different access method for improved performance.
42  * Method 0 (default) : use `memcpy()`. Safe and portable.
43  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
44  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
45  * Method 2 : direct access. This method is portable but violate C standard.
46  *            It can generate buggy code on targets which generate assembly depending on alignment.
47  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
48  * See http://stackoverflow.com/a/32095106/646947 for details.
49  * Prefer these methods in priority order (0 > 1 > 2)
50  */
51 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
52 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
53 #    define XXH_FORCE_MEMORY_ACCESS 2
54 #  elif defined(__INTEL_COMPILER) || \
55   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
56 #    define XXH_FORCE_MEMORY_ACCESS 1
57 #  endif
58 #endif
59 
60 /* XXH_ACCEPT_NULL_INPUT_POINTER :
61  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
62  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
63  * By default, this option is disabled. To enable it, uncomment below define :
64  */
65 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
66 
67 /* XXH_FORCE_NATIVE_FORMAT :
68  * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
69  * Results are therefore identical for little-endian and big-endian CPU.
70  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
71  * Should endian-independance be of no importance for your application, you may set the #define below to 1,
72  * to improve speed for Big-endian CPU.
73  * This option has no impact on Little_Endian CPU.
74  */
75 #define XXH_FORCE_NATIVE_FORMAT 0
76 
77 /* XXH_USELESS_ALIGN_BRANCH :
78  * This is a minor performance trick, only useful with lots of very small keys.
79  * It means : don't make a test between aligned/unaligned, because performance will be the same.
80  * It saves one initial branch per hash.
81  */
82 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
83 #  define XXH_USELESS_ALIGN_BRANCH 1
84 #endif
85 
86 
87 /**************************************
88 *  Compiler Specific Options
89 ***************************************/
90 #ifdef _MSC_VER    /* Visual Studio */
91 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
92 #  define FORCE_INLINE static __forceinline
93 #else
94 #  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
95 #    ifdef __GNUC__
96 #      define FORCE_INLINE static inline __attribute__((always_inline))
97 #    else
98 #      define FORCE_INLINE static inline
99 #    endif
100 #  else
101 #    define FORCE_INLINE static
102 #  endif /* __STDC_VERSION__ */
103 #endif
104 
105 
106 /**************************************
107 *  Includes & Memory related functions
108 ***************************************/
109 #include "xxhash.h"
110 /* Modify the local functions below should you wish to use some other memory routines */
111 /* for malloc(), free() */
112 #include <stdlib.h>
XXH_malloc(size_t s)113 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)114 static void  XXH_free  (void* p)  { free(p); }
115 /* for memcpy() */
116 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)117 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
118 
119 
120 /**************************************
121 *  Basic Types
122 ***************************************/
123 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
124 # include <stdint.h>
125   typedef uint8_t  BYTE;
126   typedef uint16_t U16;
127   typedef uint32_t U32;
128   typedef  int32_t S32;
129   typedef uint64_t U64;
130 #else
131   typedef unsigned char      BYTE;
132   typedef unsigned short     U16;
133   typedef unsigned int       U32;
134   typedef   signed int       S32;
135   typedef unsigned long long U64;
136 #endif
137 
138 
139 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
140 
141 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)142 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
XXH_read64(const void * memPtr)143 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
144 
145 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
146 
147 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
148 /* currently only defined for gcc and icc */
149 typedef union { U32 u32; U64 u64; } __packed unalign;
150 
XXH_read32(const void * ptr)151 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
XXH_read64(const void * ptr)152 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
153 
154 #else
155 
156 /* portable and safe solution. Generally efficient.
157  * see : http://stackoverflow.com/a/32095106/646947
158  */
159 
XXH_read32(const void * memPtr)160 static U32 XXH_read32(const void* memPtr)
161 {
162     U32 val;
163     memcpy(&val, memPtr, sizeof(val));
164     return val;
165 }
166 
XXH_read64(const void * memPtr)167 static U64 XXH_read64(const void* memPtr)
168 {
169     U64 val;
170     memcpy(&val, memPtr, sizeof(val));
171     return val;
172 }
173 
174 #endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
175 
176 
177 /******************************************
178 *  Compiler-specific Functions and Macros
179 ******************************************/
180 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
181 
182 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
183 #if defined(_MSC_VER)
184 #  define XXH_rotl32(x,r) _rotl(x,r)
185 #  define XXH_rotl64(x,r) _rotl64(x,r)
186 #else
187 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
188 #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
189 #endif
190 
191 #if defined(_MSC_VER)     /* Visual Studio */
192 #  define XXH_swap32 _byteswap_ulong
193 #  define XXH_swap64 _byteswap_uint64
194 #elif GCC_VERSION >= 403
195 #  define XXH_swap32 __builtin_bswap32
196 #  define XXH_swap64 __builtin_bswap64
197 #else
XXH_swap32(U32 x)198 static U32 XXH_swap32 (U32 x)
199 {
200     return  ((x << 24) & 0xff000000 ) |
201             ((x <<  8) & 0x00ff0000 ) |
202             ((x >>  8) & 0x0000ff00 ) |
203             ((x >> 24) & 0x000000ff );
204 }
XXH_swap64(U64 x)205 static U64 XXH_swap64 (U64 x)
206 {
207     return  ((x << 56) & 0xff00000000000000ULL) |
208             ((x << 40) & 0x00ff000000000000ULL) |
209             ((x << 24) & 0x0000ff0000000000ULL) |
210             ((x << 8)  & 0x000000ff00000000ULL) |
211             ((x >> 8)  & 0x00000000ff000000ULL) |
212             ((x >> 24) & 0x0000000000ff0000ULL) |
213             ((x >> 40) & 0x000000000000ff00ULL) |
214             ((x >> 56) & 0x00000000000000ffULL);
215 }
216 #endif
217 
218 
219 /***************************************
220 *  Architecture Macros
221 ***************************************/
222 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianness;
223 
224 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
225 #ifndef XXH_CPU_LITTLE_ENDIAN
226     static const int one = 1;
227 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
228 #endif
229 
230 
231 /*****************************
232 *  Memory reads
233 *****************************/
234 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
235 
XXH_readLE32_align(const void * ptr,XXH_endianness endian,XXH_alignment align)236 FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianness endian, XXH_alignment align)
237 {
238     if (align==XXH_unaligned)
239         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
240     else
241         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
242 }
243 
XXH_readLE32(const void * ptr,XXH_endianness endian)244 FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianness endian)
245 {
246     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
247 }
248 
XXH_readLE64_align(const void * ptr,XXH_endianness endian,XXH_alignment align)249 FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianness endian, XXH_alignment align)
250 {
251     if (align==XXH_unaligned)
252         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
253     else
254         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
255 }
256 
XXH_readLE64(const void * ptr,XXH_endianness endian)257 FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianness endian)
258 {
259     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
260 }
261 
262 
263 /***************************************
264 *  Macros
265 ***************************************/
266 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
267 
268 
269 /***************************************
270 *  Constants
271 ***************************************/
272 #define PRIME32_1   2654435761U
273 #define PRIME32_2   2246822519U
274 #define PRIME32_3   3266489917U
275 #define PRIME32_4    668265263U
276 #define PRIME32_5    374761393U
277 
278 #define PRIME64_1 11400714785074694791ULL
279 #define PRIME64_2 14029467366897019727ULL
280 #define PRIME64_3  1609587929392839161ULL
281 #define PRIME64_4  9650029242287828579ULL
282 #define PRIME64_5  2870177450012600261ULL
283 
284 
285 /*****************************
286 *  Simple Hash Functions
287 *****************************/
XXH32_endian_align(const void * input,size_t len,U32 seed,XXH_endianness endian,XXH_alignment align)288 FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianness endian, XXH_alignment align)
289 {
290     const BYTE* p = (const BYTE*)input;
291     const BYTE* bEnd = p + len;
292     U32 h32;
293 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
294 
295 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
296     if (p==NULL)
297     {
298         len=0;
299         bEnd=p=(const BYTE*)(size_t)16;
300     }
301 #endif
302 
303     if (len>=16)
304     {
305         const BYTE* const limit = bEnd - 16;
306         U32 v1 = seed + PRIME32_1 + PRIME32_2;
307         U32 v2 = seed + PRIME32_2;
308         U32 v3 = seed + 0;
309         U32 v4 = seed - PRIME32_1;
310 
311         do
312         {
313             v1 += XXH_get32bits(p) * PRIME32_2;
314             v1 = XXH_rotl32(v1, 13);
315             v1 *= PRIME32_1;
316             p+=4;
317             v2 += XXH_get32bits(p) * PRIME32_2;
318             v2 = XXH_rotl32(v2, 13);
319             v2 *= PRIME32_1;
320             p+=4;
321             v3 += XXH_get32bits(p) * PRIME32_2;
322             v3 = XXH_rotl32(v3, 13);
323             v3 *= PRIME32_1;
324             p+=4;
325             v4 += XXH_get32bits(p) * PRIME32_2;
326             v4 = XXH_rotl32(v4, 13);
327             v4 *= PRIME32_1;
328             p+=4;
329         }
330         while (p<=limit);
331 
332         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
333     }
334     else
335     {
336         h32  = seed + PRIME32_5;
337     }
338 
339     h32 += (U32) len;
340 
341     while (p+4<=bEnd)
342     {
343         h32 += XXH_get32bits(p) * PRIME32_3;
344         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
345         p+=4;
346     }
347 
348     while (p<bEnd)
349     {
350         h32 += (*p) * PRIME32_5;
351         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
352         p++;
353     }
354 
355     h32 ^= h32 >> 15;
356     h32 *= PRIME32_2;
357     h32 ^= h32 >> 13;
358     h32 *= PRIME32_3;
359     h32 ^= h32 >> 16;
360 
361     return h32;
362 }
363 
364 
XXH32(const void * input,size_t len,unsigned int seed)365 unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
366 {
367 #if 0
368     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
369     XXH32_state_t state;
370     XXH32_reset(&state, seed);
371     XXH32_update(&state, input, len);
372     return XXH32_digest(&state);
373 #else
374     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
375 
376 #  if !defined(XXH_USELESS_ALIGN_BRANCH)
377     if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
378     {
379         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
380             return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
381         else
382             return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
383     }
384 #  endif
385 
386     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
387         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
388     else
389         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
390 #endif
391 }
392 
XXH64_endian_align(const void * input,size_t len,U64 seed,XXH_endianness endian,XXH_alignment align)393 FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianness endian, XXH_alignment align)
394 {
395     const BYTE* p = (const BYTE*)input;
396     const BYTE* bEnd = p + len;
397     U64 h64;
398 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
399 
400 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
401     if (p==NULL)
402     {
403         len=0;
404         bEnd=p=(const BYTE*)(size_t)32;
405     }
406 #endif
407 
408     if (len>=32)
409     {
410         const BYTE* const limit = bEnd - 32;
411         U64 v1 = seed + PRIME64_1 + PRIME64_2;
412         U64 v2 = seed + PRIME64_2;
413         U64 v3 = seed + 0;
414         U64 v4 = seed - PRIME64_1;
415 
416         do
417         {
418             v1 += XXH_get64bits(p) * PRIME64_2;
419             p+=8;
420             v1 = XXH_rotl64(v1, 31);
421             v1 *= PRIME64_1;
422             v2 += XXH_get64bits(p) * PRIME64_2;
423             p+=8;
424             v2 = XXH_rotl64(v2, 31);
425             v2 *= PRIME64_1;
426             v3 += XXH_get64bits(p) * PRIME64_2;
427             p+=8;
428             v3 = XXH_rotl64(v3, 31);
429             v3 *= PRIME64_1;
430             v4 += XXH_get64bits(p) * PRIME64_2;
431             p+=8;
432             v4 = XXH_rotl64(v4, 31);
433             v4 *= PRIME64_1;
434         }
435         while (p<=limit);
436 
437         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
438 
439         v1 *= PRIME64_2;
440         v1 = XXH_rotl64(v1, 31);
441         v1 *= PRIME64_1;
442         h64 ^= v1;
443         h64 = h64 * PRIME64_1 + PRIME64_4;
444 
445         v2 *= PRIME64_2;
446         v2 = XXH_rotl64(v2, 31);
447         v2 *= PRIME64_1;
448         h64 ^= v2;
449         h64 = h64 * PRIME64_1 + PRIME64_4;
450 
451         v3 *= PRIME64_2;
452         v3 = XXH_rotl64(v3, 31);
453         v3 *= PRIME64_1;
454         h64 ^= v3;
455         h64 = h64 * PRIME64_1 + PRIME64_4;
456 
457         v4 *= PRIME64_2;
458         v4 = XXH_rotl64(v4, 31);
459         v4 *= PRIME64_1;
460         h64 ^= v4;
461         h64 = h64 * PRIME64_1 + PRIME64_4;
462     }
463     else
464     {
465         h64  = seed + PRIME64_5;
466     }
467 
468     h64 += (U64) len;
469 
470     while (p+8<=bEnd)
471     {
472         U64 k1 = XXH_get64bits(p);
473         k1 *= PRIME64_2;
474         k1 = XXH_rotl64(k1,31);
475         k1 *= PRIME64_1;
476         h64 ^= k1;
477         h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
478         p+=8;
479     }
480 
481     if (p+4<=bEnd)
482     {
483         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
484         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
485         p+=4;
486     }
487 
488     while (p<bEnd)
489     {
490         h64 ^= (*p) * PRIME64_5;
491         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
492         p++;
493     }
494 
495     h64 ^= h64 >> 33;
496     h64 *= PRIME64_2;
497     h64 ^= h64 >> 29;
498     h64 *= PRIME64_3;
499     h64 ^= h64 >> 32;
500 
501     return h64;
502 }
503 
504 
XXH64(const void * input,size_t len,unsigned long long seed)505 unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
506 {
507 #if 0
508     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
509     XXH64_state_t state;
510     XXH64_reset(&state, seed);
511     XXH64_update(&state, input, len);
512     return XXH64_digest(&state);
513 #else
514     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
515 
516 #  if !defined(XXH_USELESS_ALIGN_BRANCH)
517     if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
518     {
519         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
520             return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
521         else
522             return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
523     }
524 #  endif
525 
526     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
527         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
528     else
529         return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
530 #endif
531 }
532 
533 /****************************************************
534 *  Advanced Hash Functions
535 ****************************************************/
536 
537 /*** Allocation ***/
538 typedef struct
539 {
540     U64 total_len;
541     U32 seed;
542     U32 v1;
543     U32 v2;
544     U32 v3;
545     U32 v4;
546     U32 mem32[4];   /* defined as U32 for alignment */
547     U32 memsize;
548 } XXH_istate32_t;
549 
550 typedef struct
551 {
552     U64 total_len;
553     U64 seed;
554     U64 v1;
555     U64 v2;
556     U64 v3;
557     U64 v4;
558     U64 mem64[4];   /* defined as U64 for alignment */
559     U32 memsize;
560 } XXH_istate64_t;
561 
562 
XXH32_createState(void)563 XXH32_state_t* XXH32_createState(void)
564 {
565     XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
566     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
567 }
XXH32_freeState(XXH32_state_t * statePtr)568 XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
569 {
570     XXH_free(statePtr);
571     return XXH_OK;
572 }
573 
XXH64_createState(void)574 XXH64_state_t* XXH64_createState(void)
575 {
576     XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
577     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
578 }
XXH64_freeState(XXH64_state_t * statePtr)579 XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
580 {
581     XXH_free(statePtr);
582     return XXH_OK;
583 }
584 
585 
586 /*** Hash feed ***/
587 
XXH32_reset(XXH32_state_t * state_in,unsigned int seed)588 XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
589 {
590     XXH_istate32_t* state = (XXH_istate32_t*) state_in;
591     state->seed = seed;
592     state->v1 = seed + PRIME32_1 + PRIME32_2;
593     state->v2 = seed + PRIME32_2;
594     state->v3 = seed + 0;
595     state->v4 = seed - PRIME32_1;
596     state->total_len = 0;
597     state->memsize = 0;
598     return XXH_OK;
599 }
600 
XXH64_reset(XXH64_state_t * state_in,unsigned long long seed)601 XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
602 {
603     XXH_istate64_t* state = (XXH_istate64_t*) state_in;
604     state->seed = seed;
605     state->v1 = seed + PRIME64_1 + PRIME64_2;
606     state->v2 = seed + PRIME64_2;
607     state->v3 = seed + 0;
608     state->v4 = seed - PRIME64_1;
609     state->total_len = 0;
610     state->memsize = 0;
611     return XXH_OK;
612 }
613 
614 
XXH32_update_endian(XXH32_state_t * state_in,const void * input,size_t len,XXH_endianness endian)615 FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianness endian)
616 {
617     XXH_istate32_t* state = (XXH_istate32_t *) state_in;
618     const BYTE* p = (const BYTE*)input;
619     const BYTE* const bEnd = p + len;
620 
621 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
622     if (input==NULL) return XXH_ERROR;
623 #endif
624 
625     state->total_len += len;
626 
627     if (state->memsize + len < 16)   /* fill in tmp buffer */
628     {
629         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
630         state->memsize += (U32)len;
631         return XXH_OK;
632     }
633 
634     if (state->memsize)   /* some data left from previous update */
635     {
636         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
637         {
638             const U32* p32 = state->mem32;
639             state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
640             state->v1 = XXH_rotl32(state->v1, 13);
641             state->v1 *= PRIME32_1;
642             p32++;
643             state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
644             state->v2 = XXH_rotl32(state->v2, 13);
645             state->v2 *= PRIME32_1;
646             p32++;
647             state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
648             state->v3 = XXH_rotl32(state->v3, 13);
649             state->v3 *= PRIME32_1;
650             p32++;
651             state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
652             state->v4 = XXH_rotl32(state->v4, 13);
653             state->v4 *= PRIME32_1;
654             p32++;
655         }
656         p += 16-state->memsize;
657         state->memsize = 0;
658     }
659 
660     if (p <= bEnd-16)
661     {
662         const BYTE* const limit = bEnd - 16;
663         U32 v1 = state->v1;
664         U32 v2 = state->v2;
665         U32 v3 = state->v3;
666         U32 v4 = state->v4;
667 
668         do
669         {
670             v1 += XXH_readLE32(p, endian) * PRIME32_2;
671             v1 = XXH_rotl32(v1, 13);
672             v1 *= PRIME32_1;
673             p+=4;
674             v2 += XXH_readLE32(p, endian) * PRIME32_2;
675             v2 = XXH_rotl32(v2, 13);
676             v2 *= PRIME32_1;
677             p+=4;
678             v3 += XXH_readLE32(p, endian) * PRIME32_2;
679             v3 = XXH_rotl32(v3, 13);
680             v3 *= PRIME32_1;
681             p+=4;
682             v4 += XXH_readLE32(p, endian) * PRIME32_2;
683             v4 = XXH_rotl32(v4, 13);
684             v4 *= PRIME32_1;
685             p+=4;
686         }
687         while (p<=limit);
688 
689         state->v1 = v1;
690         state->v2 = v2;
691         state->v3 = v3;
692         state->v4 = v4;
693     }
694 
695     if (p < bEnd)
696     {
697         XXH_memcpy(state->mem32, p, bEnd-p);
698         state->memsize = (int)(bEnd-p);
699     }
700 
701     return XXH_OK;
702 }
703 
XXH32_update(XXH32_state_t * state_in,const void * input,size_t len)704 XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
705 {
706     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
707 
708     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
709         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
710     else
711         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
712 }
713 
714 
715 
XXH32_digest_endian(const XXH32_state_t * state_in,XXH_endianness endian)716 FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianness endian)
717 {
718     const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
719     const BYTE * p = (const BYTE*)state->mem32;
720     const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
721     U32 h32;
722 
723     if (state->total_len >= 16)
724     {
725         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
726     }
727     else
728     {
729         h32  = state->seed + PRIME32_5;
730     }
731 
732     h32 += (U32) state->total_len;
733 
734     while (p+4<=bEnd)
735     {
736         h32 += XXH_readLE32(p, endian) * PRIME32_3;
737         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
738         p+=4;
739     }
740 
741     while (p<bEnd)
742     {
743         h32 += (*p) * PRIME32_5;
744         h32 = XXH_rotl32(h32, 11) * PRIME32_1;
745         p++;
746     }
747 
748     h32 ^= h32 >> 15;
749     h32 *= PRIME32_2;
750     h32 ^= h32 >> 13;
751     h32 *= PRIME32_3;
752     h32 ^= h32 >> 16;
753 
754     return h32;
755 }
756 
757 
XXH32_digest(const XXH32_state_t * state_in)758 unsigned int XXH32_digest (const XXH32_state_t* state_in)
759 {
760     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
761 
762     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
763         return XXH32_digest_endian(state_in, XXH_littleEndian);
764     else
765         return XXH32_digest_endian(state_in, XXH_bigEndian);
766 }
767 
768 
XXH64_update_endian(XXH64_state_t * state_in,const void * input,size_t len,XXH_endianness endian)769 FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianness endian)
770 {
771     XXH_istate64_t * state = (XXH_istate64_t *) state_in;
772     const BYTE* p = (const BYTE*)input;
773     const BYTE* const bEnd = p + len;
774 
775 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
776     if (input==NULL) return XXH_ERROR;
777 #endif
778 
779     state->total_len += len;
780 
781     if (state->memsize + len < 32)   /* fill in tmp buffer */
782     {
783         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
784         state->memsize += (U32)len;
785         return XXH_OK;
786     }
787 
788     if (state->memsize)   /* some data left from previous update */
789     {
790         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
791         {
792             const U64* p64 = state->mem64;
793             state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
794             state->v1 = XXH_rotl64(state->v1, 31);
795             state->v1 *= PRIME64_1;
796             p64++;
797             state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
798             state->v2 = XXH_rotl64(state->v2, 31);
799             state->v2 *= PRIME64_1;
800             p64++;
801             state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
802             state->v3 = XXH_rotl64(state->v3, 31);
803             state->v3 *= PRIME64_1;
804             p64++;
805             state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
806             state->v4 = XXH_rotl64(state->v4, 31);
807             state->v4 *= PRIME64_1;
808             p64++;
809         }
810         p += 32-state->memsize;
811         state->memsize = 0;
812     }
813 
814     if (p+32 <= bEnd)
815     {
816         const BYTE* const limit = bEnd - 32;
817         U64 v1 = state->v1;
818         U64 v2 = state->v2;
819         U64 v3 = state->v3;
820         U64 v4 = state->v4;
821 
822         do
823         {
824             v1 += XXH_readLE64(p, endian) * PRIME64_2;
825             v1 = XXH_rotl64(v1, 31);
826             v1 *= PRIME64_1;
827             p+=8;
828             v2 += XXH_readLE64(p, endian) * PRIME64_2;
829             v2 = XXH_rotl64(v2, 31);
830             v2 *= PRIME64_1;
831             p+=8;
832             v3 += XXH_readLE64(p, endian) * PRIME64_2;
833             v3 = XXH_rotl64(v3, 31);
834             v3 *= PRIME64_1;
835             p+=8;
836             v4 += XXH_readLE64(p, endian) * PRIME64_2;
837             v4 = XXH_rotl64(v4, 31);
838             v4 *= PRIME64_1;
839             p+=8;
840         }
841         while (p<=limit);
842 
843         state->v1 = v1;
844         state->v2 = v2;
845         state->v3 = v3;
846         state->v4 = v4;
847     }
848 
849     if (p < bEnd)
850     {
851         XXH_memcpy(state->mem64, p, bEnd-p);
852         state->memsize = (int)(bEnd-p);
853     }
854 
855     return XXH_OK;
856 }
857 
XXH64_update(XXH64_state_t * state_in,const void * input,size_t len)858 XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
859 {
860     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
861 
862     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
863         return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
864     else
865         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
866 }
867 
868 
869 
XXH64_digest_endian(const XXH64_state_t * state_in,XXH_endianness endian)870 FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianness endian)
871 {
872     const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
873     const BYTE * p = (const BYTE*)state->mem64;
874     const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
875     U64 h64;
876 
877     if (state->total_len >= 32)
878     {
879         U64 v1 = state->v1;
880         U64 v2 = state->v2;
881         U64 v3 = state->v3;
882         U64 v4 = state->v4;
883 
884         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
885 
886         v1 *= PRIME64_2;
887         v1 = XXH_rotl64(v1, 31);
888         v1 *= PRIME64_1;
889         h64 ^= v1;
890         h64 = h64*PRIME64_1 + PRIME64_4;
891 
892         v2 *= PRIME64_2;
893         v2 = XXH_rotl64(v2, 31);
894         v2 *= PRIME64_1;
895         h64 ^= v2;
896         h64 = h64*PRIME64_1 + PRIME64_4;
897 
898         v3 *= PRIME64_2;
899         v3 = XXH_rotl64(v3, 31);
900         v3 *= PRIME64_1;
901         h64 ^= v3;
902         h64 = h64*PRIME64_1 + PRIME64_4;
903 
904         v4 *= PRIME64_2;
905         v4 = XXH_rotl64(v4, 31);
906         v4 *= PRIME64_1;
907         h64 ^= v4;
908         h64 = h64*PRIME64_1 + PRIME64_4;
909     }
910     else
911     {
912         h64  = state->seed + PRIME64_5;
913     }
914 
915     h64 += (U64) state->total_len;
916 
917     while (p+8<=bEnd)
918     {
919         U64 k1 = XXH_readLE64(p, endian);
920         k1 *= PRIME64_2;
921         k1 = XXH_rotl64(k1,31);
922         k1 *= PRIME64_1;
923         h64 ^= k1;
924         h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
925         p+=8;
926     }
927 
928     if (p+4<=bEnd)
929     {
930         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
931         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
932         p+=4;
933     }
934 
935     while (p<bEnd)
936     {
937         h64 ^= (*p) * PRIME64_5;
938         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
939         p++;
940     }
941 
942     h64 ^= h64 >> 33;
943     h64 *= PRIME64_2;
944     h64 ^= h64 >> 29;
945     h64 *= PRIME64_3;
946     h64 ^= h64 >> 32;
947 
948     return h64;
949 }
950 
951 
XXH64_digest(const XXH64_state_t * state_in)952 unsigned long long XXH64_digest (const XXH64_state_t* state_in)
953 {
954     XXH_endianness endian_detected = (XXH_endianness)XXH_CPU_LITTLE_ENDIAN;
955 
956     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
957         return XXH64_digest_endian(state_in, XXH_littleEndian);
958     else
959         return XXH64_digest_endian(state_in, XXH_bigEndian);
960 }
961 
962 
963