xref: /aosp_15_r20/external/zlib/slide_hash_simd.h (revision 86ee64e75fa5f8bce2c8c356138035642429cd05)
1*86ee64e7SAndroid Build Coastguard Worker /* slide_hash_simd.h
2*86ee64e7SAndroid Build Coastguard Worker  *
3*86ee64e7SAndroid Build Coastguard Worker  * Copyright 2022 The Chromium Authors
4*86ee64e7SAndroid Build Coastguard Worker  * Use of this source code is governed by a BSD-style license that can be
5*86ee64e7SAndroid Build Coastguard Worker  * found in the Chromium source repository LICENSE file.
6*86ee64e7SAndroid Build Coastguard Worker  */
7*86ee64e7SAndroid Build Coastguard Worker 
8*86ee64e7SAndroid Build Coastguard Worker #ifndef SLIDE_HASH_SIMD_H
9*86ee64e7SAndroid Build Coastguard Worker #define SLIDE_HASH_SIMD_H
10*86ee64e7SAndroid Build Coastguard Worker 
11*86ee64e7SAndroid Build Coastguard Worker #include "deflate.h"
12*86ee64e7SAndroid Build Coastguard Worker 
13*86ee64e7SAndroid Build Coastguard Worker #ifndef INLINE
14*86ee64e7SAndroid Build Coastguard Worker #if defined(_MSC_VER) && !defined(__clang__)
15*86ee64e7SAndroid Build Coastguard Worker #define INLINE __inline
16*86ee64e7SAndroid Build Coastguard Worker #else
17*86ee64e7SAndroid Build Coastguard Worker #define INLINE inline
18*86ee64e7SAndroid Build Coastguard Worker #endif
19*86ee64e7SAndroid Build Coastguard Worker #endif
20*86ee64e7SAndroid Build Coastguard Worker 
21*86ee64e7SAndroid Build Coastguard Worker #if defined(CPU_NO_SIMD)
22*86ee64e7SAndroid Build Coastguard Worker 
23*86ee64e7SAndroid Build Coastguard Worker #error SIMD has been disabled for your build target
24*86ee64e7SAndroid Build Coastguard Worker 
25*86ee64e7SAndroid Build Coastguard Worker #elif defined(DEFLATE_SLIDE_HASH_SSE2)
26*86ee64e7SAndroid Build Coastguard Worker 
27*86ee64e7SAndroid Build Coastguard Worker #include <emmintrin.h>  /* SSE2 */
28*86ee64e7SAndroid Build Coastguard Worker 
29*86ee64e7SAndroid Build Coastguard Worker #define Z_SLIDE_INIT_SIMD(wsize) _mm_set1_epi16((ush)(wsize))
30*86ee64e7SAndroid Build Coastguard Worker 
31*86ee64e7SAndroid Build Coastguard Worker #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \
32*86ee64e7SAndroid Build Coastguard Worker     for (const Posf* const end = table + size; table != end;) { \
33*86ee64e7SAndroid Build Coastguard Worker         __m128i vO = _mm_loadu_si128((__m128i *)(table + 0)); \
34*86ee64e7SAndroid Build Coastguard Worker         vO = _mm_subs_epu16(vO, vector_wsize); \
35*86ee64e7SAndroid Build Coastguard Worker         _mm_storeu_si128((__m128i *)(table + 0), vO); \
36*86ee64e7SAndroid Build Coastguard Worker         table += 8; \
37*86ee64e7SAndroid Build Coastguard Worker     }
38*86ee64e7SAndroid Build Coastguard Worker 
39*86ee64e7SAndroid Build Coastguard Worker typedef __m128i z_vec128i_u16x8_t;
40*86ee64e7SAndroid Build Coastguard Worker 
41*86ee64e7SAndroid Build Coastguard Worker #elif defined(DEFLATE_SLIDE_HASH_NEON)
42*86ee64e7SAndroid Build Coastguard Worker 
43*86ee64e7SAndroid Build Coastguard Worker #include <arm_neon.h>  /* NEON */
44*86ee64e7SAndroid Build Coastguard Worker 
45*86ee64e7SAndroid Build Coastguard Worker #define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize))
46*86ee64e7SAndroid Build Coastguard Worker 
47*86ee64e7SAndroid Build Coastguard Worker #define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \
48*86ee64e7SAndroid Build Coastguard Worker     for (const Posf* const end = table + size; table != end;) { \
49*86ee64e7SAndroid Build Coastguard Worker         uint16x8_t vO = vld1q_u16(table + 0); \
50*86ee64e7SAndroid Build Coastguard Worker         uint16x8_t v8 = vld1q_u16(table + 8); \
51*86ee64e7SAndroid Build Coastguard Worker         vO = vqsubq_u16(vO, vector_wsize); \
52*86ee64e7SAndroid Build Coastguard Worker         v8 = vqsubq_u16(v8, vector_wsize); \
53*86ee64e7SAndroid Build Coastguard Worker         vst1q_u16(table + 0, vO); \
54*86ee64e7SAndroid Build Coastguard Worker         vst1q_u16(table + 8, v8); \
55*86ee64e7SAndroid Build Coastguard Worker         table += 8 + 8; \
56*86ee64e7SAndroid Build Coastguard Worker     }
57*86ee64e7SAndroid Build Coastguard Worker 
58*86ee64e7SAndroid Build Coastguard Worker typedef uint16x8_t z_vec128i_u16x8_t;
59*86ee64e7SAndroid Build Coastguard Worker 
60*86ee64e7SAndroid Build Coastguard Worker #else
61*86ee64e7SAndroid Build Coastguard Worker 
62*86ee64e7SAndroid Build Coastguard Worker #error slide_hash_simd is not defined for your build target
63*86ee64e7SAndroid Build Coastguard Worker 
64*86ee64e7SAndroid Build Coastguard Worker #endif
65*86ee64e7SAndroid Build Coastguard Worker 
66*86ee64e7SAndroid Build Coastguard Worker /* ===========================================================================
67*86ee64e7SAndroid Build Coastguard Worker  * Slide the hash table when sliding the window down (could be avoided with 32
68*86ee64e7SAndroid Build Coastguard Worker  * bit values at the expense of memory usage). We slide even when level == 0 to
69*86ee64e7SAndroid Build Coastguard Worker  * keep the hash table consistent if we switch back to level > 0 later.
70*86ee64e7SAndroid Build Coastguard Worker  */
slide_hash_simd(Posf * head,Posf * prev,const uInt w_size,const uInt hash_size)71*86ee64e7SAndroid Build Coastguard Worker local INLINE void slide_hash_simd(
72*86ee64e7SAndroid Build Coastguard Worker     Posf *head, Posf *prev, const uInt w_size, const uInt hash_size) {
73*86ee64e7SAndroid Build Coastguard Worker     /*
74*86ee64e7SAndroid Build Coastguard Worker      * The SIMD implementation of the hash table slider assumes:
75*86ee64e7SAndroid Build Coastguard Worker      *
76*86ee64e7SAndroid Build Coastguard Worker      * 1. hash chain offset is 2 bytes. Should be true as Pos is "ush" type.
77*86ee64e7SAndroid Build Coastguard Worker      */
78*86ee64e7SAndroid Build Coastguard Worker     Assert(sizeof(Pos) == 2, "Pos type size error: should be 2 bytes");
79*86ee64e7SAndroid Build Coastguard Worker     Assert(sizeof(ush) == 2, "ush type size error: should be 2 bytes");
80*86ee64e7SAndroid Build Coastguard Worker 
81*86ee64e7SAndroid Build Coastguard Worker     Assert(hash_size <= (1 << 16), "Hash table maximum size error");
82*86ee64e7SAndroid Build Coastguard Worker     Assert(hash_size >= (1 << 8), "Hash table minimum size error");
83*86ee64e7SAndroid Build Coastguard Worker     Assert(w_size == (ush)w_size, "Prev table size error");
84*86ee64e7SAndroid Build Coastguard Worker 
85*86ee64e7SAndroid Build Coastguard Worker     /*
86*86ee64e7SAndroid Build Coastguard Worker      * 2. The hash & prev table sizes are a multiple of 32 bytes (256 bits),
87*86ee64e7SAndroid Build Coastguard Worker      * since the NEON table slider moves two 128-bit items per loop (loop is
88*86ee64e7SAndroid Build Coastguard Worker      * unrolled on NEON for performance, see http://crbug.com/863257).
89*86ee64e7SAndroid Build Coastguard Worker      */
90*86ee64e7SAndroid Build Coastguard Worker     Assert(!((hash_size * sizeof(head[0])) & (32 - 1)),
91*86ee64e7SAndroid Build Coastguard Worker         "Hash table size error: should be a multiple of 32 bytes");
92*86ee64e7SAndroid Build Coastguard Worker     Assert(!((w_size * sizeof(prev[0])) & (32 - 1)),
93*86ee64e7SAndroid Build Coastguard Worker         "Prev table size error: should be a multiple of 32 bytes");
94*86ee64e7SAndroid Build Coastguard Worker 
95*86ee64e7SAndroid Build Coastguard Worker     /*
96*86ee64e7SAndroid Build Coastguard Worker      * Duplicate (ush)w_size in each uint16_t component of a 128-bit vector.
97*86ee64e7SAndroid Build Coastguard Worker      */
98*86ee64e7SAndroid Build Coastguard Worker     const z_vec128i_u16x8_t vec_wsize = Z_SLIDE_INIT_SIMD(w_size);
99*86ee64e7SAndroid Build Coastguard Worker 
100*86ee64e7SAndroid Build Coastguard Worker     /*
101*86ee64e7SAndroid Build Coastguard Worker      * Slide {head,prev} hash chain values: subtracts (ush)w_size from every
102*86ee64e7SAndroid Build Coastguard Worker      * value with a saturating SIMD subtract, to clamp the result to 0(NIL),
103*86ee64e7SAndroid Build Coastguard Worker      * to implement slide_hash() `(m >= wsize ? m - wsize : NIL);` code.
104*86ee64e7SAndroid Build Coastguard Worker      */
105*86ee64e7SAndroid Build Coastguard Worker     Z_SLIDE_HASH_SIMD(head, hash_size, vec_wsize);
106*86ee64e7SAndroid Build Coastguard Worker #ifndef FASTEST
107*86ee64e7SAndroid Build Coastguard Worker     Z_SLIDE_HASH_SIMD(prev, w_size, vec_wsize);
108*86ee64e7SAndroid Build Coastguard Worker #endif
109*86ee64e7SAndroid Build Coastguard Worker 
110*86ee64e7SAndroid Build Coastguard Worker }
111*86ee64e7SAndroid Build Coastguard Worker 
112*86ee64e7SAndroid Build Coastguard Worker #undef z_vec128i_u16x8_t
113*86ee64e7SAndroid Build Coastguard Worker #undef Z_SLIDE_HASH_SIMD
114*86ee64e7SAndroid Build Coastguard Worker #undef Z_SLIDE_INIT_SIMD
115*86ee64e7SAndroid Build Coastguard Worker 
116*86ee64e7SAndroid Build Coastguard Worker #endif  /* SLIDE_HASH_SIMD_H */
117