1*bed243d3SAndroid Build Coastguard Worker /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
2*bed243d3SAndroid Build Coastguard Worker *
3*bed243d3SAndroid Build Coastguard Worker * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bed243d3SAndroid Build Coastguard Worker * See https://llvm.org/LICENSE.txt for license information.
5*bed243d3SAndroid Build Coastguard Worker * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bed243d3SAndroid Build Coastguard Worker *
7*bed243d3SAndroid Build Coastguard Worker *===-----------------------------------------------------------------------===
8*bed243d3SAndroid Build Coastguard Worker */
9*bed243d3SAndroid Build Coastguard Worker
10*bed243d3SAndroid Build Coastguard Worker #ifndef __AMMINTRIN_H
11*bed243d3SAndroid Build Coastguard Worker #define __AMMINTRIN_H
12*bed243d3SAndroid Build Coastguard Worker
13*bed243d3SAndroid Build Coastguard Worker #if !defined(__i386__) && !defined(__x86_64__)
14*bed243d3SAndroid Build Coastguard Worker #error "This header is only meant to be used on x86 and x64 architecture"
15*bed243d3SAndroid Build Coastguard Worker #endif
16*bed243d3SAndroid Build Coastguard Worker
17*bed243d3SAndroid Build Coastguard Worker #include <pmmintrin.h>
18*bed243d3SAndroid Build Coastguard Worker
19*bed243d3SAndroid Build Coastguard Worker /* Define the default attributes for the functions in this file. */
20*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
21*bed243d3SAndroid Build Coastguard Worker
22*bed243d3SAndroid Build Coastguard Worker /// Extracts the specified bits from the lower 64 bits of the 128-bit
23*bed243d3SAndroid Build Coastguard Worker /// integer vector operand at the index \a idx and of the length \a len.
24*bed243d3SAndroid Build Coastguard Worker ///
25*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
26*bed243d3SAndroid Build Coastguard Worker ///
27*bed243d3SAndroid Build Coastguard Worker /// \code
28*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
29*bed243d3SAndroid Build Coastguard Worker /// \endcode
30*bed243d3SAndroid Build Coastguard Worker ///
31*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
32*bed243d3SAndroid Build Coastguard Worker ///
33*bed243d3SAndroid Build Coastguard Worker /// \param x
34*bed243d3SAndroid Build Coastguard Worker /// The value from which bits are extracted.
35*bed243d3SAndroid Build Coastguard Worker /// \param len
36*bed243d3SAndroid Build Coastguard Worker /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
37*bed243d3SAndroid Build Coastguard Worker /// are zero, the length is interpreted as 64.
38*bed243d3SAndroid Build Coastguard Worker /// \param idx
39*bed243d3SAndroid Build Coastguard Worker /// Bits [5:0] specify the index of the least significant bit; the other
40*bed243d3SAndroid Build Coastguard Worker /// bits are ignored. If the sum of the index and length is greater than 64,
41*bed243d3SAndroid Build Coastguard Worker /// the result is undefined. If the length and index are both zero, bits
42*bed243d3SAndroid Build Coastguard Worker /// [63:0] of parameter \a x are extracted. If the length is zero but the
43*bed243d3SAndroid Build Coastguard Worker /// index is non-zero, the result is undefined.
44*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
45*bed243d3SAndroid Build Coastguard Worker /// extracted from the source operand.
46*bed243d3SAndroid Build Coastguard Worker #define _mm_extracti_si64(x, len, idx) \
47*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
48*bed243d3SAndroid Build Coastguard Worker (char)(len), (char)(idx)))
49*bed243d3SAndroid Build Coastguard Worker
50*bed243d3SAndroid Build Coastguard Worker /// Extracts the specified bits from the lower 64 bits of the 128-bit
51*bed243d3SAndroid Build Coastguard Worker /// integer vector operand at the index and of the length specified by
52*bed243d3SAndroid Build Coastguard Worker /// \a __y.
53*bed243d3SAndroid Build Coastguard Worker ///
54*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
55*bed243d3SAndroid Build Coastguard Worker ///
56*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
57*bed243d3SAndroid Build Coastguard Worker ///
58*bed243d3SAndroid Build Coastguard Worker /// \param __x
59*bed243d3SAndroid Build Coastguard Worker /// The value from which bits are extracted.
60*bed243d3SAndroid Build Coastguard Worker /// \param __y
61*bed243d3SAndroid Build Coastguard Worker /// Specifies the index of the least significant bit at [13:8] and the
62*bed243d3SAndroid Build Coastguard Worker /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
63*bed243d3SAndroid Build Coastguard Worker /// length is interpreted as 64. If the sum of the index and length is
64*bed243d3SAndroid Build Coastguard Worker /// greater than 64, the result is undefined. If the length and index are
65*bed243d3SAndroid Build Coastguard Worker /// both zero, bits [63:0] of parameter \a __x are extracted. If the length
66*bed243d3SAndroid Build Coastguard Worker /// is zero but the index is non-zero, the result is undefined.
67*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
68*bed243d3SAndroid Build Coastguard Worker /// from the source operand.
69*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_extract_si64(__m128i __x,__m128i __y)70*bed243d3SAndroid Build Coastguard Worker _mm_extract_si64(__m128i __x, __m128i __y)
71*bed243d3SAndroid Build Coastguard Worker {
72*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
73*bed243d3SAndroid Build Coastguard Worker }
74*bed243d3SAndroid Build Coastguard Worker
75*bed243d3SAndroid Build Coastguard Worker /// Inserts bits of a specified length from the source integer vector
76*bed243d3SAndroid Build Coastguard Worker /// \a y into the lower 64 bits of the destination integer vector \a x at
77*bed243d3SAndroid Build Coastguard Worker /// the index \a idx and of the length \a len.
78*bed243d3SAndroid Build Coastguard Worker ///
79*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
80*bed243d3SAndroid Build Coastguard Worker ///
81*bed243d3SAndroid Build Coastguard Worker /// \code
82*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
83*bed243d3SAndroid Build Coastguard Worker /// const int idx);
84*bed243d3SAndroid Build Coastguard Worker /// \endcode
85*bed243d3SAndroid Build Coastguard Worker ///
86*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
87*bed243d3SAndroid Build Coastguard Worker ///
88*bed243d3SAndroid Build Coastguard Worker /// \param x
89*bed243d3SAndroid Build Coastguard Worker /// The destination operand where bits will be inserted. The inserted bits
90*bed243d3SAndroid Build Coastguard Worker /// are defined by the length \a len and by the index \a idx specifying the
91*bed243d3SAndroid Build Coastguard Worker /// least significant bit.
92*bed243d3SAndroid Build Coastguard Worker /// \param y
93*bed243d3SAndroid Build Coastguard Worker /// The source operand containing the bits to be extracted. The extracted
94*bed243d3SAndroid Build Coastguard Worker /// bits are the least significant bits of operand \a y of length \a len.
95*bed243d3SAndroid Build Coastguard Worker /// \param len
96*bed243d3SAndroid Build Coastguard Worker /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
97*bed243d3SAndroid Build Coastguard Worker /// are zero, the length is interpreted as 64.
98*bed243d3SAndroid Build Coastguard Worker /// \param idx
99*bed243d3SAndroid Build Coastguard Worker /// Bits [5:0] specify the index of the least significant bit; the other
100*bed243d3SAndroid Build Coastguard Worker /// bits are ignored. If the sum of the index and length is greater than 64,
101*bed243d3SAndroid Build Coastguard Worker /// the result is undefined. If the length and index are both zero, bits
102*bed243d3SAndroid Build Coastguard Worker /// [63:0] of parameter \a y are inserted into parameter \a x. If the length
103*bed243d3SAndroid Build Coastguard Worker /// is zero but the index is non-zero, the result is undefined.
104*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the original lower 64-bits of
105*bed243d3SAndroid Build Coastguard Worker /// destination operand \a x with the specified bitfields replaced by the
106*bed243d3SAndroid Build Coastguard Worker /// lower bits of source operand \a y. The upper 64 bits of the return value
107*bed243d3SAndroid Build Coastguard Worker /// are undefined.
108*bed243d3SAndroid Build Coastguard Worker #define _mm_inserti_si64(x, y, len, idx) \
109*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
110*bed243d3SAndroid Build Coastguard Worker (__v2di)(__m128i)(y), \
111*bed243d3SAndroid Build Coastguard Worker (char)(len), (char)(idx)))
112*bed243d3SAndroid Build Coastguard Worker
113*bed243d3SAndroid Build Coastguard Worker /// Inserts bits of a specified length from the source integer vector
114*bed243d3SAndroid Build Coastguard Worker /// \a __y into the lower 64 bits of the destination integer vector \a __x
115*bed243d3SAndroid Build Coastguard Worker /// at the index and of the length specified by \a __y.
116*bed243d3SAndroid Build Coastguard Worker ///
117*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
118*bed243d3SAndroid Build Coastguard Worker ///
119*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
120*bed243d3SAndroid Build Coastguard Worker ///
121*bed243d3SAndroid Build Coastguard Worker /// \param __x
122*bed243d3SAndroid Build Coastguard Worker /// The destination operand where bits will be inserted. The inserted bits
123*bed243d3SAndroid Build Coastguard Worker /// are defined by the length and by the index of the least significant bit
124*bed243d3SAndroid Build Coastguard Worker /// specified by operand \a __y.
125*bed243d3SAndroid Build Coastguard Worker /// \param __y
126*bed243d3SAndroid Build Coastguard Worker /// The source operand containing the bits to be extracted. The extracted
127*bed243d3SAndroid Build Coastguard Worker /// bits are the least significant bits of operand \a __y with length
128*bed243d3SAndroid Build Coastguard Worker /// specified by bits [69:64]. These are inserted into the destination at the
129*bed243d3SAndroid Build Coastguard Worker /// index specified by bits [77:72]; all other bits are ignored. If bits
130*bed243d3SAndroid Build Coastguard Worker /// [69:64] are zero, the length is interpreted as 64. If the sum of the
131*bed243d3SAndroid Build Coastguard Worker /// index and length is greater than 64, the result is undefined. If the
132*bed243d3SAndroid Build Coastguard Worker /// length and index are both zero, bits [63:0] of parameter \a __y are
133*bed243d3SAndroid Build Coastguard Worker /// inserted into parameter \a __x. If the length is zero but the index is
134*bed243d3SAndroid Build Coastguard Worker /// non-zero, the result is undefined.
135*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the original lower 64-bits of
136*bed243d3SAndroid Build Coastguard Worker /// destination operand \a __x with the specified bitfields replaced by the
137*bed243d3SAndroid Build Coastguard Worker /// lower bits of source operand \a __y. The upper 64 bits of the return
138*bed243d3SAndroid Build Coastguard Worker /// value are undefined.
139*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_insert_si64(__m128i __x,__m128i __y)140*bed243d3SAndroid Build Coastguard Worker _mm_insert_si64(__m128i __x, __m128i __y)
141*bed243d3SAndroid Build Coastguard Worker {
142*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
143*bed243d3SAndroid Build Coastguard Worker }
144*bed243d3SAndroid Build Coastguard Worker
145*bed243d3SAndroid Build Coastguard Worker /// Stores a 64-bit double-precision value in a 64-bit memory location.
146*bed243d3SAndroid Build Coastguard Worker /// To minimize caching, the data is flagged as non-temporal (unlikely to be
147*bed243d3SAndroid Build Coastguard Worker /// used again soon).
148*bed243d3SAndroid Build Coastguard Worker ///
149*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
150*bed243d3SAndroid Build Coastguard Worker ///
151*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
152*bed243d3SAndroid Build Coastguard Worker ///
153*bed243d3SAndroid Build Coastguard Worker /// \param __p
154*bed243d3SAndroid Build Coastguard Worker /// The 64-bit memory location used to store the register value.
155*bed243d3SAndroid Build Coastguard Worker /// \param __a
156*bed243d3SAndroid Build Coastguard Worker /// The 64-bit double-precision floating-point register value to be stored.
157*bed243d3SAndroid Build Coastguard Worker static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_sd(void * __p,__m128d __a)158*bed243d3SAndroid Build Coastguard Worker _mm_stream_sd(void *__p, __m128d __a)
159*bed243d3SAndroid Build Coastguard Worker {
160*bed243d3SAndroid Build Coastguard Worker __builtin_ia32_movntsd((double *)__p, (__v2df)__a);
161*bed243d3SAndroid Build Coastguard Worker }
162*bed243d3SAndroid Build Coastguard Worker
163*bed243d3SAndroid Build Coastguard Worker /// Stores a 32-bit single-precision floating-point value in a 32-bit
164*bed243d3SAndroid Build Coastguard Worker /// memory location. To minimize caching, the data is flagged as
165*bed243d3SAndroid Build Coastguard Worker /// non-temporal (unlikely to be used again soon).
166*bed243d3SAndroid Build Coastguard Worker ///
167*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
168*bed243d3SAndroid Build Coastguard Worker ///
169*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
170*bed243d3SAndroid Build Coastguard Worker ///
171*bed243d3SAndroid Build Coastguard Worker /// \param __p
172*bed243d3SAndroid Build Coastguard Worker /// The 32-bit memory location used to store the register value.
173*bed243d3SAndroid Build Coastguard Worker /// \param __a
174*bed243d3SAndroid Build Coastguard Worker /// The 32-bit single-precision floating-point register value to be stored.
175*bed243d3SAndroid Build Coastguard Worker static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ss(void * __p,__m128 __a)176*bed243d3SAndroid Build Coastguard Worker _mm_stream_ss(void *__p, __m128 __a)
177*bed243d3SAndroid Build Coastguard Worker {
178*bed243d3SAndroid Build Coastguard Worker __builtin_ia32_movntss((float *)__p, (__v4sf)__a);
179*bed243d3SAndroid Build Coastguard Worker }
180*bed243d3SAndroid Build Coastguard Worker
181*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS
182*bed243d3SAndroid Build Coastguard Worker
183*bed243d3SAndroid Build Coastguard Worker #endif /* __AMMINTRIN_H */
184