1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
16 #define ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
17
18 #include "absl/base/config.h"
19
20 #ifdef __aarch64__
21 #include <arm_neon.h>
22
23 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
24 #define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x)
25 #define vreinterpretq_s64_m128i(x) (x)
26
27 // Guarantees that every preceding store is globally visible before any
28 // subsequent store.
29 // https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
_mm_sfence(void)30 static inline __attribute__((always_inline)) void _mm_sfence(void) {
31 __sync_synchronize();
32 }
33
34 // Load 128-bits of integer data from unaligned memory into dst. This intrinsic
35 // may perform better than _mm_loadu_si128 when the data crosses a cache line
36 // boundary.
37 //
38 // dst[127:0] := MEM[mem_addr+127:mem_addr]
39 //
40 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128
41 #define _mm_lddqu_si128 _mm_loadu_si128
42
43 // Loads 128-bit value. :
44 // https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx
_mm_loadu_si128(const __m128i * p)45 static inline __attribute__((always_inline)) __m128i _mm_loadu_si128(
46 const __m128i *p) {
47 return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *)p));
48 }
49
50 // Stores the data in a to the address p without polluting the caches. If the
51 // cache line containing address p is already in the cache, the cache will be
52 // updated.
53 // https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
_mm_stream_si128(__m128i * p,__m128i a)54 static inline __attribute__((always_inline)) void _mm_stream_si128(__m128i *p,
55 __m128i a) {
56 #if ABSL_HAVE_BUILTIN(__builtin_nontemporal_store)
57 __builtin_nontemporal_store(a, p);
58 #else
59 vst1q_s64((int64_t *)p, vreinterpretq_s64_m128i(a));
60 #endif
61 }
62
63 // Sets the 16 signed 8-bit integer values.
64 // https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx
_mm_set_epi8(signed char b15,signed char b14,signed char b13,signed char b12,signed char b11,signed char b10,signed char b9,signed char b8,signed char b7,signed char b6,signed char b5,signed char b4,signed char b3,signed char b2,signed char b1,signed char b0)65 static inline __attribute__((always_inline)) __m128i _mm_set_epi8(
66 signed char b15, signed char b14, signed char b13, signed char b12,
67 signed char b11, signed char b10, signed char b9, signed char b8,
68 signed char b7, signed char b6, signed char b5, signed char b4,
69 signed char b3, signed char b2, signed char b1, signed char b0) {
70 int8_t __attribute__((aligned(16)))
71 data[16] = {(int8_t)b0, (int8_t)b1, (int8_t)b2, (int8_t)b3,
72 (int8_t)b4, (int8_t)b5, (int8_t)b6, (int8_t)b7,
73 (int8_t)b8, (int8_t)b9, (int8_t)b10, (int8_t)b11,
74 (int8_t)b12, (int8_t)b13, (int8_t)b14, (int8_t)b15};
75 return (__m128i)vld1q_s8(data);
76 }
77 #endif // __aarch64__
78
79 #endif // ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
80