1 /* chunkset_neon.c -- NEON inline functions to copy small data chunks.
2  * For conditions of distribution and use, see copyright notice in zlib.h
3  */
4 
5 #ifdef ARM_NEON_CHUNKSET
6 #ifdef _M_ARM64
7 #  include <arm64_neon.h>
8 #else
9 #  include <arm_neon.h>
10 #endif
11 #include "../../zbuild.h"
12 
13 typedef uint8x16_t chunk_t;
14 
15 #define CHUNK_SIZE 16
16 
17 #define HAVE_CHUNKMEMSET_2
18 #define HAVE_CHUNKMEMSET_4
19 #define HAVE_CHUNKMEMSET_8
20 
chunkmemset_2(uint8_t * from,chunk_t * chunk)21 static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
22     uint16_t tmp;
23     zmemcpy_2(&tmp, from);
24     *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
25 }
26 
chunkmemset_4(uint8_t * from,chunk_t * chunk)27 static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
28     uint32_t tmp;
29     zmemcpy_4(&tmp, from);
30     *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
31 }
32 
chunkmemset_8(uint8_t * from,chunk_t * chunk)33 static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
34     uint64_t tmp;
35     zmemcpy_8(&tmp, from);
36     *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
37 }
38 
39 #define CHUNKSIZE        chunksize_neon
40 #define CHUNKCOPY        chunkcopy_neon
41 #define CHUNKCOPY_SAFE   chunkcopy_safe_neon
42 #define CHUNKUNROLL      chunkunroll_neon
43 #define CHUNKMEMSET      chunkmemset_neon
44 #define CHUNKMEMSET_SAFE chunkmemset_safe_neon
45 
loadchunk(uint8_t const * s,chunk_t * chunk)46 static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
47     *chunk = vld1q_u8(s);
48 }
49 
storechunk(uint8_t * out,chunk_t * chunk)50 static inline void storechunk(uint8_t *out, chunk_t *chunk) {
51     vst1q_u8(out, *chunk);
52 }
53 
54 #include "chunkset_tpl.h"
55 
56 #endif
57