xref: /aosp_15_r20/external/clang/lib/Headers/mmintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li 
24*67e74705SXin Li #ifndef __MMINTRIN_H
25*67e74705SXin Li #define __MMINTRIN_H
26*67e74705SXin Li 
27*67e74705SXin Li typedef long long __m64 __attribute__((__vector_size__(8)));
28*67e74705SXin Li 
29*67e74705SXin Li typedef long long __v1di __attribute__((__vector_size__(8)));
30*67e74705SXin Li typedef int __v2si __attribute__((__vector_size__(8)));
31*67e74705SXin Li typedef short __v4hi __attribute__((__vector_size__(8)));
32*67e74705SXin Li typedef char __v8qi __attribute__((__vector_size__(8)));
33*67e74705SXin Li 
34*67e74705SXin Li /* Define the default attributes for the functions in this file. */
35*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
36*67e74705SXin Li 
37*67e74705SXin Li /// \brief Clears the MMX state by setting the state of the x87 stack registers
38*67e74705SXin Li ///    to empty.
39*67e74705SXin Li ///
40*67e74705SXin Li /// \headerfile <x86intrin.h>
41*67e74705SXin Li ///
42*67e74705SXin Li /// This intrinsic corresponds to the \c EMMS instruction.
43*67e74705SXin Li ///
44*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm_empty(void)45*67e74705SXin Li _mm_empty(void)
46*67e74705SXin Li {
47*67e74705SXin Li     __builtin_ia32_emms();
48*67e74705SXin Li }
49*67e74705SXin Li 
50*67e74705SXin Li /// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
51*67e74705SXin Li ///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
52*67e74705SXin Li ///
53*67e74705SXin Li /// \headerfile <x86intrin.h>
54*67e74705SXin Li ///
55*67e74705SXin Li /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
56*67e74705SXin Li ///
57*67e74705SXin Li /// \param __i
58*67e74705SXin Li ///    A 32-bit integer value.
59*67e74705SXin Li /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
60*67e74705SXin Li ///    parameter. The upper 32 bits are set to 0.
61*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvtsi32_si64(int __i)62*67e74705SXin Li _mm_cvtsi32_si64(int __i)
63*67e74705SXin Li {
64*67e74705SXin Li     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
65*67e74705SXin Li }
66*67e74705SXin Li 
67*67e74705SXin Li /// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
68*67e74705SXin Li ///    signed integer.
69*67e74705SXin Li ///
70*67e74705SXin Li /// \headerfile <x86intrin.h>
71*67e74705SXin Li ///
72*67e74705SXin Li /// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
73*67e74705SXin Li ///
74*67e74705SXin Li /// \param __m
75*67e74705SXin Li ///    A 64-bit integer vector.
76*67e74705SXin Li /// \returns A 32-bit signed integer value containing the lower 32 bits of the
77*67e74705SXin Li ///    parameter.
78*67e74705SXin Li static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvtsi64_si32(__m64 __m)79*67e74705SXin Li _mm_cvtsi64_si32(__m64 __m)
80*67e74705SXin Li {
81*67e74705SXin Li     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li /// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
85*67e74705SXin Li ///
86*67e74705SXin Li /// \headerfile <x86intrin.h>
87*67e74705SXin Li ///
88*67e74705SXin Li /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
89*67e74705SXin Li ///
90*67e74705SXin Li /// \param __i
91*67e74705SXin Li ///    A 64-bit signed integer.
92*67e74705SXin Li /// \returns A 64-bit integer vector containing the same bitwise pattern as the
93*67e74705SXin Li ///    parameter.
94*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cvtsi64_m64(long long __i)95*67e74705SXin Li _mm_cvtsi64_m64(long long __i)
96*67e74705SXin Li {
97*67e74705SXin Li     return (__m64)__i;
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li /// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
101*67e74705SXin Li ///
102*67e74705SXin Li /// \headerfile <x86intrin.h>
103*67e74705SXin Li ///
104*67e74705SXin Li /// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
105*67e74705SXin Li ///
106*67e74705SXin Li /// \param __m
107*67e74705SXin Li ///    A 64-bit integer vector.
108*67e74705SXin Li /// \returns A 64-bit signed integer containing the same bitwise pattern as the
109*67e74705SXin Li ///    parameter.
110*67e74705SXin Li static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvtm64_si64(__m64 __m)111*67e74705SXin Li _mm_cvtm64_si64(__m64 __m)
112*67e74705SXin Li {
113*67e74705SXin Li     return (long long)__m;
114*67e74705SXin Li }
115*67e74705SXin Li 
116*67e74705SXin Li /// \brief Converts 16-bit signed integers from both 64-bit integer vector
117*67e74705SXin Li ///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
118*67e74705SXin Li ///    a 64-bit integer vector of [8 x i8] as the result. Positive values
119*67e74705SXin Li ///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
120*67e74705SXin Li ///    are saturated to 0x80.
121*67e74705SXin Li ///
122*67e74705SXin Li /// \headerfile <x86intrin.h>
123*67e74705SXin Li ///
124*67e74705SXin Li /// This intrinsic corresponds to the \c PACKSSWB instruction.
125*67e74705SXin Li ///
126*67e74705SXin Li /// \param __m1
127*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
128*67e74705SXin Li ///    16-bit signed integer and is converted to an 8-bit signed integer with
129*67e74705SXin Li ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
130*67e74705SXin Li ///    Negative values less than 0x80 are saturated to 0x80. The converted
131*67e74705SXin Li ///    [4 x i8] values are written to the lower 32 bits of the result.
132*67e74705SXin Li /// \param __m2
133*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
134*67e74705SXin Li ///    16-bit signed integer and is converted to an 8-bit signed integer with
135*67e74705SXin Li ///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
136*67e74705SXin Li ///    Negative values less than 0x80 are saturated to 0x80. The converted
137*67e74705SXin Li ///    [4 x i8] values are written to the upper 32 bits of the result.
138*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the converted
139*67e74705SXin Li ///    values.
140*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_packs_pi16(__m64 __m1,__m64 __m2)141*67e74705SXin Li _mm_packs_pi16(__m64 __m1, __m64 __m2)
142*67e74705SXin Li {
143*67e74705SXin Li     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
144*67e74705SXin Li }
145*67e74705SXin Li 
146*67e74705SXin Li /// \brief Converts 32-bit signed integers from both 64-bit integer vector
147*67e74705SXin Li ///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
148*67e74705SXin Li ///    a 64-bit integer vector of [4 x i16] as the result. Positive values
149*67e74705SXin Li ///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
150*67e74705SXin Li ///    0x8000 are saturated to 0x8000.
151*67e74705SXin Li ///
152*67e74705SXin Li /// \headerfile <x86intrin.h>
153*67e74705SXin Li ///
154*67e74705SXin Li /// This intrinsic corresponds to the \c PACKSSDW instruction.
155*67e74705SXin Li ///
156*67e74705SXin Li /// \param __m1
157*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
158*67e74705SXin Li ///    32-bit signed integer and is converted to a 16-bit signed integer with
159*67e74705SXin Li ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
160*67e74705SXin Li ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
161*67e74705SXin Li ///    [2 x i16] values are written to the lower 32 bits of the result.
162*67e74705SXin Li /// \param __m2
163*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
164*67e74705SXin Li ///    32-bit signed integer and is converted to a 16-bit signed integer with
165*67e74705SXin Li ///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
166*67e74705SXin Li ///    Negative values less than 0x8000 are saturated to 0x8000. The converted
167*67e74705SXin Li ///    [2 x i16] values are written to the upper 32 bits of the result.
168*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the converted
169*67e74705SXin Li ///    values.
170*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_packs_pi32(__m64 __m1,__m64 __m2)171*67e74705SXin Li _mm_packs_pi32(__m64 __m1, __m64 __m2)
172*67e74705SXin Li {
173*67e74705SXin Li     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
174*67e74705SXin Li }
175*67e74705SXin Li 
176*67e74705SXin Li /// \brief Converts 16-bit signed integers from both 64-bit integer vector
177*67e74705SXin Li ///    parameters of [4 x i16] into 8-bit unsigned integer values, and
178*67e74705SXin Li ///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
179*67e74705SXin Li ///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
180*67e74705SXin Li ///    to 0.
181*67e74705SXin Li ///
182*67e74705SXin Li /// \headerfile <x86intrin.h>
183*67e74705SXin Li ///
184*67e74705SXin Li /// This intrinsic corresponds to the \c PACKUSWB instruction.
185*67e74705SXin Li ///
186*67e74705SXin Li /// \param __m1
187*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
188*67e74705SXin Li ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
189*67e74705SXin Li ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
190*67e74705SXin Li ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
191*67e74705SXin Li ///    the lower 32 bits of the result.
192*67e74705SXin Li /// \param __m2
193*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
194*67e74705SXin Li ///    16-bit signed integer and is converted to an 8-bit unsigned integer with
195*67e74705SXin Li ///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
196*67e74705SXin Li ///    than 0 are saturated to 0. The converted [4 x i8] values are written to
197*67e74705SXin Li ///    the upper 32 bits of the result.
198*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the converted
199*67e74705SXin Li ///    values.
200*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_packs_pu16(__m64 __m1,__m64 __m2)201*67e74705SXin Li _mm_packs_pu16(__m64 __m1, __m64 __m2)
202*67e74705SXin Li {
203*67e74705SXin Li     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
204*67e74705SXin Li }
205*67e74705SXin Li 
206*67e74705SXin Li /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
207*67e74705SXin Li ///    and interleaves them into a 64-bit integer vector of [8 x i8].
208*67e74705SXin Li ///
209*67e74705SXin Li /// \headerfile <x86intrin.h>
210*67e74705SXin Li ///
211*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKHBW instruction.
212*67e74705SXin Li ///
213*67e74705SXin Li /// \param __m1
214*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
215*67e74705SXin Li ///    Bits [39:32] are written to bits [7:0] of the result.
216*67e74705SXin Li ///    Bits [47:40] are written to bits [23:16] of the result.
217*67e74705SXin Li ///    Bits [55:48] are written to bits [39:32] of the result.
218*67e74705SXin Li ///    Bits [63:56] are written to bits [55:48] of the result.
219*67e74705SXin Li /// \param __m2
220*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
221*67e74705SXin Li ///    Bits [39:32] are written to bits [15:8] of the result.
222*67e74705SXin Li ///    Bits [47:40] are written to bits [31:24] of the result.
223*67e74705SXin Li ///    Bits [55:48] are written to bits [47:40] of the result.
224*67e74705SXin Li ///    Bits [63:56] are written to bits [63:56] of the result.
225*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
226*67e74705SXin Li ///    values.
227*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpackhi_pi8(__m64 __m1,__m64 __m2)228*67e74705SXin Li _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
229*67e74705SXin Li {
230*67e74705SXin Li     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
231*67e74705SXin Li }
232*67e74705SXin Li 
233*67e74705SXin Li /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
234*67e74705SXin Li ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
235*67e74705SXin Li ///
236*67e74705SXin Li /// \headerfile <x86intrin.h>
237*67e74705SXin Li ///
238*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKHWD instruction.
239*67e74705SXin Li ///
240*67e74705SXin Li /// \param __m1
241*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
242*67e74705SXin Li ///    Bits [47:32] are written to bits [15:0] of the result.
243*67e74705SXin Li ///    Bits [63:48] are written to bits [47:32] of the result.
244*67e74705SXin Li /// \param __m2
245*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
246*67e74705SXin Li ///    Bits [47:32] are written to bits [31:16] of the result.
247*67e74705SXin Li ///    Bits [63:48] are written to bits [63:48] of the result.
248*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
249*67e74705SXin Li ///    values.
250*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpackhi_pi16(__m64 __m1,__m64 __m2)251*67e74705SXin Li _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
252*67e74705SXin Li {
253*67e74705SXin Li     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
254*67e74705SXin Li }
255*67e74705SXin Li 
256*67e74705SXin Li /// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
257*67e74705SXin Li ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
258*67e74705SXin Li ///
259*67e74705SXin Li /// \headerfile <x86intrin.h>
260*67e74705SXin Li ///
261*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKHDQ instruction.
262*67e74705SXin Li ///
263*67e74705SXin Li /// \param __m1
264*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
265*67e74705SXin Li ///    the lower 32 bits of the result.
266*67e74705SXin Li /// \param __m2
267*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
268*67e74705SXin Li ///    the upper 32 bits of the result.
269*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
270*67e74705SXin Li ///    values.
271*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpackhi_pi32(__m64 __m1,__m64 __m2)272*67e74705SXin Li _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
273*67e74705SXin Li {
274*67e74705SXin Li     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
275*67e74705SXin Li }
276*67e74705SXin Li 
277*67e74705SXin Li /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
278*67e74705SXin Li ///    and interleaves them into a 64-bit integer vector of [8 x i8].
279*67e74705SXin Li ///
280*67e74705SXin Li /// \headerfile <x86intrin.h>
281*67e74705SXin Li ///
282*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKLBW instruction.
283*67e74705SXin Li ///
284*67e74705SXin Li /// \param __m1
285*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
286*67e74705SXin Li ///    Bits [7:0] are written to bits [7:0] of the result.
287*67e74705SXin Li ///    Bits [15:8] are written to bits [23:16] of the result.
288*67e74705SXin Li ///    Bits [23:16] are written to bits [39:32] of the result.
289*67e74705SXin Li ///    Bits [31:24] are written to bits [55:48] of the result.
290*67e74705SXin Li /// \param __m2
291*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
292*67e74705SXin Li ///    Bits [7:0] are written to bits [15:8] of the result.
293*67e74705SXin Li ///    Bits [15:8] are written to bits [31:24] of the result.
294*67e74705SXin Li ///    Bits [23:16] are written to bits [47:40] of the result.
295*67e74705SXin Li ///    Bits [31:24] are written to bits [63:56] of the result.
296*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
297*67e74705SXin Li ///    values.
298*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpacklo_pi8(__m64 __m1,__m64 __m2)299*67e74705SXin Li _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
300*67e74705SXin Li {
301*67e74705SXin Li     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
302*67e74705SXin Li }
303*67e74705SXin Li 
304*67e74705SXin Li /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
305*67e74705SXin Li ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
306*67e74705SXin Li ///
307*67e74705SXin Li /// \headerfile <x86intrin.h>
308*67e74705SXin Li ///
309*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKLWD instruction.
310*67e74705SXin Li ///
311*67e74705SXin Li /// \param __m1
312*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
313*67e74705SXin Li ///    Bits [15:0] are written to bits [15:0] of the result.
314*67e74705SXin Li ///    Bits [31:16] are written to bits [47:32] of the result.
315*67e74705SXin Li /// \param __m2
316*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
317*67e74705SXin Li ///    Bits [15:0] are written to bits [31:16] of the result.
318*67e74705SXin Li ///    Bits [31:16] are written to bits [63:48] of the result.
319*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
320*67e74705SXin Li ///    values.
321*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpacklo_pi16(__m64 __m1,__m64 __m2)322*67e74705SXin Li _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
323*67e74705SXin Li {
324*67e74705SXin Li     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
325*67e74705SXin Li }
326*67e74705SXin Li 
327*67e74705SXin Li /// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
328*67e74705SXin Li ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
329*67e74705SXin Li ///
330*67e74705SXin Li /// \headerfile <x86intrin.h>
331*67e74705SXin Li ///
332*67e74705SXin Li /// This intrinsic corresponds to the \c PUNPCKLDQ instruction.
333*67e74705SXin Li ///
334*67e74705SXin Li /// \param __m1
335*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
336*67e74705SXin Li ///    the lower 32 bits of the result.
337*67e74705SXin Li /// \param __m2
338*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
339*67e74705SXin Li ///    the upper 32 bits of the result.
340*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
341*67e74705SXin Li ///    values.
342*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_unpacklo_pi32(__m64 __m1,__m64 __m2)343*67e74705SXin Li _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
344*67e74705SXin Li {
345*67e74705SXin Li     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
346*67e74705SXin Li }
347*67e74705SXin Li 
348*67e74705SXin Li /// \brief Adds each 8-bit integer element of the first 64-bit integer vector
349*67e74705SXin Li ///    of [8 x i8] to the corresponding 8-bit integer element of the second
350*67e74705SXin Li ///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
351*67e74705SXin Li ///    packed into a 64-bit integer vector of [8 x i8].
352*67e74705SXin Li ///
353*67e74705SXin Li /// \headerfile <x86intrin.h>
354*67e74705SXin Li ///
355*67e74705SXin Li /// This intrinsic corresponds to the \c PADDB instruction.
356*67e74705SXin Li ///
357*67e74705SXin Li /// \param __m1
358*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
359*67e74705SXin Li /// \param __m2
360*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
361*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
362*67e74705SXin Li ///    parameters.
363*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_add_pi8(__m64 __m1,__m64 __m2)364*67e74705SXin Li _mm_add_pi8(__m64 __m1, __m64 __m2)
365*67e74705SXin Li {
366*67e74705SXin Li     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
367*67e74705SXin Li }
368*67e74705SXin Li 
369*67e74705SXin Li /// \brief Adds each 16-bit integer element of the first 64-bit integer vector
370*67e74705SXin Li ///    of [4 x i16] to the corresponding 16-bit integer element of the second
371*67e74705SXin Li ///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
372*67e74705SXin Li ///    packed into a 64-bit integer vector of [4 x i16].
373*67e74705SXin Li ///
374*67e74705SXin Li /// \headerfile <x86intrin.h>
375*67e74705SXin Li ///
376*67e74705SXin Li /// This intrinsic corresponds to the \c PADDW instruction.
377*67e74705SXin Li ///
378*67e74705SXin Li /// \param __m1
379*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
380*67e74705SXin Li /// \param __m2
381*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
382*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
383*67e74705SXin Li ///    parameters.
384*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_add_pi16(__m64 __m1,__m64 __m2)385*67e74705SXin Li _mm_add_pi16(__m64 __m1, __m64 __m2)
386*67e74705SXin Li {
387*67e74705SXin Li     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
388*67e74705SXin Li }
389*67e74705SXin Li 
390*67e74705SXin Li /// \brief Adds each 32-bit integer element of the first 64-bit integer vector
391*67e74705SXin Li ///    of [2 x i32] to the corresponding 32-bit integer element of the second
392*67e74705SXin Li ///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
393*67e74705SXin Li ///    packed into a 64-bit integer vector of [2 x i32].
394*67e74705SXin Li ///
395*67e74705SXin Li /// \headerfile <x86intrin.h>
396*67e74705SXin Li ///
397*67e74705SXin Li /// This intrinsic corresponds to the \c PADDD instruction.
398*67e74705SXin Li ///
399*67e74705SXin Li /// \param __m1
400*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
401*67e74705SXin Li /// \param __m2
402*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
403*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
404*67e74705SXin Li ///    parameters.
405*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_add_pi32(__m64 __m1,__m64 __m2)406*67e74705SXin Li _mm_add_pi32(__m64 __m1, __m64 __m2)
407*67e74705SXin Li {
408*67e74705SXin Li     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
409*67e74705SXin Li }
410*67e74705SXin Li 
411*67e74705SXin Li /// \brief Adds each 8-bit signed integer element of the first 64-bit integer
412*67e74705SXin Li ///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
413*67e74705SXin Li ///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
414*67e74705SXin Li ///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
415*67e74705SXin Li ///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
416*67e74705SXin Li ///
417*67e74705SXin Li /// \headerfile <x86intrin.h>
418*67e74705SXin Li ///
419*67e74705SXin Li /// This intrinsic corresponds to the \c PADDSB instruction.
420*67e74705SXin Li ///
421*67e74705SXin Li /// \param __m1
422*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
423*67e74705SXin Li /// \param __m2
424*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
425*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
426*67e74705SXin Li ///    of both parameters.
427*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_adds_pi8(__m64 __m1,__m64 __m2)428*67e74705SXin Li _mm_adds_pi8(__m64 __m1, __m64 __m2)
429*67e74705SXin Li {
430*67e74705SXin Li     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
431*67e74705SXin Li }
432*67e74705SXin Li 
433*67e74705SXin Li /// \brief Adds each 16-bit signed integer element of the first 64-bit integer
434*67e74705SXin Li ///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
435*67e74705SXin Li ///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
436*67e74705SXin Li ///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
437*67e74705SXin Li ///    saturated to 0x8000. The results are packed into a 64-bit integer vector
438*67e74705SXin Li ///    of [4 x i16].
439*67e74705SXin Li ///
440*67e74705SXin Li /// \headerfile <x86intrin.h>
441*67e74705SXin Li ///
442*67e74705SXin Li /// This intrinsic corresponds to the \c PADDSW instruction.
443*67e74705SXin Li ///
444*67e74705SXin Li /// \param __m1
445*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
446*67e74705SXin Li /// \param __m2
447*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
448*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
449*67e74705SXin Li ///    of both parameters.
450*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_adds_pi16(__m64 __m1,__m64 __m2)451*67e74705SXin Li _mm_adds_pi16(__m64 __m1, __m64 __m2)
452*67e74705SXin Li {
453*67e74705SXin Li     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
454*67e74705SXin Li }
455*67e74705SXin Li 
456*67e74705SXin Li /// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
457*67e74705SXin Li ///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
458*67e74705SXin Li ///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
459*67e74705SXin Li ///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
460*67e74705SXin Li ///    [8 x i8].
461*67e74705SXin Li ///
462*67e74705SXin Li /// \headerfile <x86intrin.h>
463*67e74705SXin Li ///
464*67e74705SXin Li /// This intrinsic corresponds to the \c PADDUSB instruction.
465*67e74705SXin Li ///
466*67e74705SXin Li /// \param __m1
467*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
468*67e74705SXin Li /// \param __m2
469*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
470*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
471*67e74705SXin Li ///    unsigned sums of both parameters.
472*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_adds_pu8(__m64 __m1,__m64 __m2)473*67e74705SXin Li _mm_adds_pu8(__m64 __m1, __m64 __m2)
474*67e74705SXin Li {
475*67e74705SXin Li     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
476*67e74705SXin Li }
477*67e74705SXin Li 
478*67e74705SXin Li /// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
479*67e74705SXin Li ///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
480*67e74705SXin Li ///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
481*67e74705SXin Li ///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
482*67e74705SXin Li ///    integer vector of [4 x i16].
483*67e74705SXin Li ///
484*67e74705SXin Li /// \headerfile <x86intrin.h>
485*67e74705SXin Li ///
486*67e74705SXin Li /// This intrinsic corresponds to the \c PADDUSW instruction.
487*67e74705SXin Li ///
488*67e74705SXin Li /// \param __m1
489*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
490*67e74705SXin Li /// \param __m2
491*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
492*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
493*67e74705SXin Li ///    unsigned sums of both parameters.
494*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_adds_pu16(__m64 __m1,__m64 __m2)495*67e74705SXin Li _mm_adds_pu16(__m64 __m1, __m64 __m2)
496*67e74705SXin Li {
497*67e74705SXin Li     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
498*67e74705SXin Li }
499*67e74705SXin Li 
500*67e74705SXin Li /// \brief Subtracts each 8-bit integer element of the second 64-bit integer
501*67e74705SXin Li ///    vector of [8 x i8] from the corresponding 8-bit integer element of the
502*67e74705SXin Li ///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
503*67e74705SXin Li ///    are packed into a 64-bit integer vector of [8 x i8].
504*67e74705SXin Li ///
505*67e74705SXin Li /// \headerfile <x86intrin.h>
506*67e74705SXin Li ///
507*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBB instruction.
508*67e74705SXin Li ///
509*67e74705SXin Li /// \param __m1
510*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the minuends.
511*67e74705SXin Li /// \param __m2
512*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
513*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
514*67e74705SXin Li ///    both parameters.
515*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_pi8(__m64 __m1,__m64 __m2)516*67e74705SXin Li _mm_sub_pi8(__m64 __m1, __m64 __m2)
517*67e74705SXin Li {
518*67e74705SXin Li     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
519*67e74705SXin Li }
520*67e74705SXin Li 
521*67e74705SXin Li /// \brief Subtracts each 16-bit integer element of the second 64-bit integer
522*67e74705SXin Li ///    vector of [4 x i16] from the corresponding 16-bit integer element of the
523*67e74705SXin Li ///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
524*67e74705SXin Li ///    results are packed into a 64-bit integer vector of [4 x i16].
525*67e74705SXin Li ///
526*67e74705SXin Li /// \headerfile <x86intrin.h>
527*67e74705SXin Li ///
528*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBW instruction.
529*67e74705SXin Li ///
530*67e74705SXin Li /// \param __m1
531*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the minuends.
532*67e74705SXin Li /// \param __m2
533*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
534*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
535*67e74705SXin Li ///    both parameters.
536*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_pi16(__m64 __m1,__m64 __m2)537*67e74705SXin Li _mm_sub_pi16(__m64 __m1, __m64 __m2)
538*67e74705SXin Li {
539*67e74705SXin Li     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
540*67e74705SXin Li }
541*67e74705SXin Li 
542*67e74705SXin Li /// \brief Subtracts each 32-bit integer element of the second 64-bit integer
543*67e74705SXin Li ///    vector of [2 x i32] from the corresponding 32-bit integer element of the
544*67e74705SXin Li ///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
545*67e74705SXin Li ///    results are packed into a 64-bit integer vector of [2 x i32].
546*67e74705SXin Li ///
547*67e74705SXin Li /// \headerfile <x86intrin.h>
548*67e74705SXin Li ///
549*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBD instruction.
550*67e74705SXin Li ///
551*67e74705SXin Li /// \param __m1
552*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32] containing the minuends.
553*67e74705SXin Li /// \param __m2
554*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
555*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
556*67e74705SXin Li ///    both parameters.
557*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sub_pi32(__m64 __m1,__m64 __m2)558*67e74705SXin Li _mm_sub_pi32(__m64 __m1, __m64 __m2)
559*67e74705SXin Li {
560*67e74705SXin Li     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
561*67e74705SXin Li }
562*67e74705SXin Li 
563*67e74705SXin Li /// \brief Subtracts each 8-bit signed integer element of the second 64-bit
564*67e74705SXin Li ///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
565*67e74705SXin Li ///    element of the first 64-bit integer vector of [8 x i8]. Positive results
566*67e74705SXin Li ///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
567*67e74705SXin Li ///    are saturated to 0x80. The results are packed into a 64-bit integer
568*67e74705SXin Li ///    vector of [8 x i8].
569*67e74705SXin Li ///
570*67e74705SXin Li /// \headerfile <x86intrin.h>
571*67e74705SXin Li ///
572*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBSB instruction.
573*67e74705SXin Li ///
574*67e74705SXin Li /// \param __m1
575*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the minuends.
576*67e74705SXin Li /// \param __m2
577*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
578*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
579*67e74705SXin Li ///    differences of both parameters.
580*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_subs_pi8(__m64 __m1,__m64 __m2)581*67e74705SXin Li _mm_subs_pi8(__m64 __m1, __m64 __m2)
582*67e74705SXin Li {
583*67e74705SXin Li     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
584*67e74705SXin Li }
585*67e74705SXin Li 
586*67e74705SXin Li /// \brief Subtracts each 16-bit signed integer element of the second 64-bit
587*67e74705SXin Li ///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
588*67e74705SXin Li ///    element of the first 64-bit integer vector of [4 x i16]. Positive results
589*67e74705SXin Li ///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
590*67e74705SXin Li ///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
591*67e74705SXin Li ///    integer vector of [4 x i16].
592*67e74705SXin Li ///
593*67e74705SXin Li /// \headerfile <x86intrin.h>
594*67e74705SXin Li ///
595*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBSW instruction.
596*67e74705SXin Li ///
597*67e74705SXin Li /// \param __m1
598*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the minuends.
599*67e74705SXin Li /// \param __m2
600*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
601*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
602*67e74705SXin Li ///    differences of both parameters.
603*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_subs_pi16(__m64 __m1,__m64 __m2)604*67e74705SXin Li _mm_subs_pi16(__m64 __m1, __m64 __m2)
605*67e74705SXin Li {
606*67e74705SXin Li     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
607*67e74705SXin Li }
608*67e74705SXin Li 
609*67e74705SXin Li /// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
610*67e74705SXin Li ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
611*67e74705SXin Li ///    element of the first 64-bit integer vector of [8 x i8]. If an element of
612*67e74705SXin Li ///    the first vector is less than the corresponding element of the second
613*67e74705SXin Li ///    vector, the result is saturated to 0. The results are packed into a
614*67e74705SXin Li ///    64-bit integer vector of [8 x i8].
615*67e74705SXin Li ///
616*67e74705SXin Li /// \headerfile <x86intrin.h>
617*67e74705SXin Li ///
618*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBUSB instruction.
619*67e74705SXin Li ///
620*67e74705SXin Li /// \param __m1
621*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the minuends.
622*67e74705SXin Li /// \param __m2
623*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
624*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
625*67e74705SXin Li ///    differences of both parameters.
626*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_subs_pu8(__m64 __m1,__m64 __m2)627*67e74705SXin Li _mm_subs_pu8(__m64 __m1, __m64 __m2)
628*67e74705SXin Li {
629*67e74705SXin Li     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
630*67e74705SXin Li }
631*67e74705SXin Li 
632*67e74705SXin Li /// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
633*67e74705SXin Li ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
634*67e74705SXin Li ///    integer element of the first 64-bit integer vector of [4 x i16]. If an
635*67e74705SXin Li ///    element of the first vector is less than the corresponding element of the
636*67e74705SXin Li ///    second vector, the result is saturated to 0. The results are packed into
637*67e74705SXin Li ///    a 64-bit integer vector of [4 x i16].
638*67e74705SXin Li ///
639*67e74705SXin Li /// \headerfile <x86intrin.h>
640*67e74705SXin Li ///
641*67e74705SXin Li /// This intrinsic corresponds to the \c PSUBUSW instruction.
642*67e74705SXin Li ///
643*67e74705SXin Li /// \param __m1
644*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the minuends.
645*67e74705SXin Li /// \param __m2
646*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
647*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
648*67e74705SXin Li ///    differences of both parameters.
649*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_subs_pu16(__m64 __m1,__m64 __m2)650*67e74705SXin Li _mm_subs_pu16(__m64 __m1, __m64 __m2)
651*67e74705SXin Li {
652*67e74705SXin Li     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
653*67e74705SXin Li }
654*67e74705SXin Li 
655*67e74705SXin Li /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
656*67e74705SXin Li ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
657*67e74705SXin Li ///    element of the second 64-bit integer vector of [4 x i16] and get four
658*67e74705SXin Li ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
659*67e74705SXin Li ///    The lower 32 bits of these two sums are packed into a 64-bit integer
660*67e74705SXin Li ///    vector of [2 x i32]. For example, bits [15:0] of both parameters are
661*67e74705SXin Li ///    multiplied, bits [31:16] of both parameters are multiplied, and the sum
662*67e74705SXin Li ///    of both results is written to bits [31:0] of the result.
663*67e74705SXin Li ///
664*67e74705SXin Li /// \headerfile <x86intrin.h>
665*67e74705SXin Li ///
666*67e74705SXin Li /// This intrinsic corresponds to the \c PMADDWD instruction.
667*67e74705SXin Li ///
668*67e74705SXin Li /// \param __m1
669*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
670*67e74705SXin Li /// \param __m2
671*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
672*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
673*67e74705SXin Li ///    products of both parameters.
674*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_madd_pi16(__m64 __m1,__m64 __m2)675*67e74705SXin Li _mm_madd_pi16(__m64 __m1, __m64 __m2)
676*67e74705SXin Li {
677*67e74705SXin Li     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
678*67e74705SXin Li }
679*67e74705SXin Li 
680*67e74705SXin Li /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
681*67e74705SXin Li ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
682*67e74705SXin Li ///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
683*67e74705SXin Li ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
684*67e74705SXin Li ///
685*67e74705SXin Li /// \headerfile <x86intrin.h>
686*67e74705SXin Li ///
687*67e74705SXin Li /// This intrinsic corresponds to the \c PMULHW instruction.
688*67e74705SXin Li ///
689*67e74705SXin Li /// \param __m1
690*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
691*67e74705SXin Li /// \param __m2
692*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
693*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
694*67e74705SXin Li ///    of the products of both parameters.
695*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mulhi_pi16(__m64 __m1,__m64 __m2)696*67e74705SXin Li _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
697*67e74705SXin Li {
698*67e74705SXin Li     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
699*67e74705SXin Li }
700*67e74705SXin Li 
701*67e74705SXin Li /// \brief Multiplies each 16-bit signed integer element of the first 64-bit
702*67e74705SXin Li ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
703*67e74705SXin Li ///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
704*67e74705SXin Li ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
705*67e74705SXin Li ///
706*67e74705SXin Li /// \headerfile <x86intrin.h>
707*67e74705SXin Li ///
708*67e74705SXin Li /// This intrinsic corresponds to the \c PMULLW instruction.
709*67e74705SXin Li ///
710*67e74705SXin Li /// \param __m1
711*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
712*67e74705SXin Li /// \param __m2
713*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
714*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
715*67e74705SXin Li ///    of the products of both parameters.
716*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mullo_pi16(__m64 __m1,__m64 __m2)717*67e74705SXin Li _mm_mullo_pi16(__m64 __m1, __m64 __m2)
718*67e74705SXin Li {
719*67e74705SXin Li     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
720*67e74705SXin Li }
721*67e74705SXin Li 
722*67e74705SXin Li /// \brief Left-shifts each 16-bit signed integer element of the first
723*67e74705SXin Li ///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
724*67e74705SXin Li ///    of bits specified by the second parameter, which is a 64-bit integer. The
725*67e74705SXin Li ///    lower 16 bits of the results are packed into a 64-bit integer vector of
726*67e74705SXin Li ///    [4 x i16].
727*67e74705SXin Li ///
728*67e74705SXin Li /// \headerfile <x86intrin.h>
729*67e74705SXin Li ///
730*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLW instruction.
731*67e74705SXin Li ///
732*67e74705SXin Li /// \param __m
733*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
734*67e74705SXin Li /// \param __count
735*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
736*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
737*67e74705SXin Li ///    values. If __count is greater or equal to 16, the result is set to all 0.
738*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sll_pi16(__m64 __m,__m64 __count)739*67e74705SXin Li _mm_sll_pi16(__m64 __m, __m64 __count)
740*67e74705SXin Li {
741*67e74705SXin Li     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
742*67e74705SXin Li }
743*67e74705SXin Li 
744*67e74705SXin Li /// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
745*67e74705SXin Li ///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
746*67e74705SXin Li ///    The lower 16 bits of the results are packed into a 64-bit integer vector
747*67e74705SXin Li ///    of [4 x i16].
748*67e74705SXin Li ///
749*67e74705SXin Li /// \headerfile <x86intrin.h>
750*67e74705SXin Li ///
751*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLW instruction.
752*67e74705SXin Li ///
753*67e74705SXin Li /// \param __m
754*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
755*67e74705SXin Li /// \param __count
756*67e74705SXin Li ///    A 32-bit integer value.
757*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
758*67e74705SXin Li ///    values. If __count is greater or equal to 16, the result is set to all 0.
759*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_slli_pi16(__m64 __m,int __count)760*67e74705SXin Li _mm_slli_pi16(__m64 __m, int __count)
761*67e74705SXin Li {
762*67e74705SXin Li     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
763*67e74705SXin Li }
764*67e74705SXin Li 
765*67e74705SXin Li /// \brief Left-shifts each 32-bit signed integer element of the first
766*67e74705SXin Li ///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
767*67e74705SXin Li ///    of bits specified by the second parameter, which is a 64-bit integer. The
768*67e74705SXin Li ///    lower 32 bits of the results are packed into a 64-bit integer vector of
769*67e74705SXin Li ///    [2 x i32].
770*67e74705SXin Li ///
771*67e74705SXin Li /// \headerfile <x86intrin.h>
772*67e74705SXin Li ///
773*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLD instruction.
774*67e74705SXin Li ///
775*67e74705SXin Li /// \param __m
776*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
777*67e74705SXin Li /// \param __count
778*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
779*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
780*67e74705SXin Li ///    values. If __count is greater or equal to 32, the result is set to all 0.
781*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sll_pi32(__m64 __m,__m64 __count)782*67e74705SXin Li _mm_sll_pi32(__m64 __m, __m64 __count)
783*67e74705SXin Li {
784*67e74705SXin Li     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
785*67e74705SXin Li }
786*67e74705SXin Li 
787*67e74705SXin Li /// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
788*67e74705SXin Li ///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
789*67e74705SXin Li ///    The lower 32 bits of the results are packed into a 64-bit integer vector
790*67e74705SXin Li ///    of [2 x i32].
791*67e74705SXin Li ///
792*67e74705SXin Li /// \headerfile <x86intrin.h>
793*67e74705SXin Li ///
794*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLD instruction.
795*67e74705SXin Li ///
796*67e74705SXin Li /// \param __m
797*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
798*67e74705SXin Li /// \param __count
799*67e74705SXin Li ///    A 32-bit integer value.
800*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
801*67e74705SXin Li ///    values. If __count is greater or equal to 32, the result is set to all 0.
802*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_slli_pi32(__m64 __m,int __count)803*67e74705SXin Li _mm_slli_pi32(__m64 __m, int __count)
804*67e74705SXin Li {
805*67e74705SXin Li     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
806*67e74705SXin Li }
807*67e74705SXin Li 
808*67e74705SXin Li /// \brief Left-shifts the first 64-bit integer parameter by the number of bits
809*67e74705SXin Li ///    specified by the second 64-bit integer parameter. The lower 64 bits of
810*67e74705SXin Li ///    result are returned.
811*67e74705SXin Li ///
812*67e74705SXin Li /// \headerfile <x86intrin.h>
813*67e74705SXin Li ///
814*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLQ instruction.
815*67e74705SXin Li ///
816*67e74705SXin Li /// \param __m
817*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
818*67e74705SXin Li /// \param __count
819*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
820*67e74705SXin Li /// \returns A 64-bit integer vector containing the left-shifted value. If
821*67e74705SXin Li ///     __count is greater or equal to 64, the result is set to 0.
822*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sll_si64(__m64 __m,__m64 __count)823*67e74705SXin Li _mm_sll_si64(__m64 __m, __m64 __count)
824*67e74705SXin Li {
825*67e74705SXin Li     return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
826*67e74705SXin Li }
827*67e74705SXin Li 
828*67e74705SXin Li /// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
829*67e74705SXin Li ///    number of bits specified by the second parameter, which is a 32-bit
830*67e74705SXin Li ///    integer. The lower 64 bits of result are returned.
831*67e74705SXin Li ///
832*67e74705SXin Li /// \headerfile <x86intrin.h>
833*67e74705SXin Li ///
834*67e74705SXin Li /// This intrinsic corresponds to the \c PSLLQ instruction.
835*67e74705SXin Li ///
836*67e74705SXin Li /// \param __m
837*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
838*67e74705SXin Li /// \param __count
839*67e74705SXin Li ///    A 32-bit integer value.
840*67e74705SXin Li /// \returns A 64-bit integer vector containing the left-shifted value. If
841*67e74705SXin Li ///     __count is greater or equal to 64, the result is set to 0.
842*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_slli_si64(__m64 __m,int __count)843*67e74705SXin Li _mm_slli_si64(__m64 __m, int __count)
844*67e74705SXin Li {
845*67e74705SXin Li     return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
846*67e74705SXin Li }
847*67e74705SXin Li 
848*67e74705SXin Li /// \brief Right-shifts each 16-bit integer element of the first parameter,
849*67e74705SXin Li ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
850*67e74705SXin Li ///    specified by the second parameter, which is a 64-bit integer. High-order
851*67e74705SXin Li ///    bits are filled with the sign bit of the initial value of each 16-bit
852*67e74705SXin Li ///    element. The 16-bit results are packed into a 64-bit integer vector of
853*67e74705SXin Li ///    [4 x i16].
854*67e74705SXin Li ///
855*67e74705SXin Li /// \headerfile <x86intrin.h>
856*67e74705SXin Li ///
857*67e74705SXin Li /// This intrinsic corresponds to the \c PSRAW instruction.
858*67e74705SXin Li ///
859*67e74705SXin Li /// \param __m
860*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
861*67e74705SXin Li /// \param __count
862*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
863*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
864*67e74705SXin Li ///    values.
865*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sra_pi16(__m64 __m,__m64 __count)866*67e74705SXin Li _mm_sra_pi16(__m64 __m, __m64 __count)
867*67e74705SXin Li {
868*67e74705SXin Li     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
869*67e74705SXin Li }
870*67e74705SXin Li 
871*67e74705SXin Li /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
872*67e74705SXin Li ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
873*67e74705SXin Li ///    High-order bits are filled with the sign bit of the initial value of each
874*67e74705SXin Li ///    16-bit element. The 16-bit results are packed into a 64-bit integer
875*67e74705SXin Li ///    vector of [4 x i16].
876*67e74705SXin Li ///
877*67e74705SXin Li /// \headerfile <x86intrin.h>
878*67e74705SXin Li ///
879*67e74705SXin Li /// This intrinsic corresponds to the \c PSRAW instruction.
880*67e74705SXin Li ///
881*67e74705SXin Li /// \param __m
882*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
883*67e74705SXin Li /// \param __count
884*67e74705SXin Li ///    A 32-bit integer value.
885*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
886*67e74705SXin Li ///    values.
887*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srai_pi16(__m64 __m,int __count)888*67e74705SXin Li _mm_srai_pi16(__m64 __m, int __count)
889*67e74705SXin Li {
890*67e74705SXin Li     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
891*67e74705SXin Li }
892*67e74705SXin Li 
893*67e74705SXin Li /// \brief Right-shifts each 32-bit integer element of the first parameter,
894*67e74705SXin Li ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
895*67e74705SXin Li ///    specified by the second parameter, which is a 64-bit integer. High-order
896*67e74705SXin Li ///    bits are filled with the sign bit of the initial value of each 32-bit
897*67e74705SXin Li ///    element. The 32-bit results are packed into a 64-bit integer vector of
898*67e74705SXin Li ///    [2 x i32].
899*67e74705SXin Li ///
900*67e74705SXin Li /// \headerfile <x86intrin.h>
901*67e74705SXin Li ///
902*67e74705SXin Li /// This intrinsic corresponds to the \c PSRAD instruction.
903*67e74705SXin Li ///
904*67e74705SXin Li /// \param __m
905*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
906*67e74705SXin Li /// \param __count
907*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
908*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
909*67e74705SXin Li ///    values.
910*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sra_pi32(__m64 __m,__m64 __count)911*67e74705SXin Li _mm_sra_pi32(__m64 __m, __m64 __count)
912*67e74705SXin Li {
913*67e74705SXin Li     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
914*67e74705SXin Li }
915*67e74705SXin Li 
916*67e74705SXin Li /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
917*67e74705SXin Li ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
918*67e74705SXin Li ///    High-order bits are filled with the sign bit of the initial value of each
919*67e74705SXin Li ///    32-bit element. The 32-bit results are packed into a 64-bit integer
920*67e74705SXin Li ///    vector of [2 x i32].
921*67e74705SXin Li ///
922*67e74705SXin Li /// \headerfile <x86intrin.h>
923*67e74705SXin Li ///
924*67e74705SXin Li /// This intrinsic corresponds to the \c PSRAD instruction.
925*67e74705SXin Li ///
926*67e74705SXin Li /// \param __m
927*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
928*67e74705SXin Li /// \param __count
929*67e74705SXin Li ///    A 32-bit integer value.
930*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
931*67e74705SXin Li ///    values.
932*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srai_pi32(__m64 __m,int __count)933*67e74705SXin Li _mm_srai_pi32(__m64 __m, int __count)
934*67e74705SXin Li {
935*67e74705SXin Li     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
936*67e74705SXin Li }
937*67e74705SXin Li 
938*67e74705SXin Li /// \brief Right-shifts each 16-bit integer element of the first parameter,
939*67e74705SXin Li ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
940*67e74705SXin Li ///    specified by the second parameter, which is a 64-bit integer. High-order
941*67e74705SXin Li ///    bits are cleared. The 16-bit results are packed into a 64-bit integer
942*67e74705SXin Li ///    vector of [4 x i16].
943*67e74705SXin Li ///
944*67e74705SXin Li /// \headerfile <x86intrin.h>
945*67e74705SXin Li ///
946*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLW instruction.
947*67e74705SXin Li ///
948*67e74705SXin Li /// \param __m
949*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
950*67e74705SXin Li /// \param __count
951*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
952*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
953*67e74705SXin Li ///    values.
954*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_pi16(__m64 __m,__m64 __count)955*67e74705SXin Li _mm_srl_pi16(__m64 __m, __m64 __count)
956*67e74705SXin Li {
957*67e74705SXin Li     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
958*67e74705SXin Li }
959*67e74705SXin Li 
960*67e74705SXin Li /// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
961*67e74705SXin Li ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
962*67e74705SXin Li ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
963*67e74705SXin Li ///    integer vector of [4 x i16].
964*67e74705SXin Li ///
965*67e74705SXin Li /// \headerfile <x86intrin.h>
966*67e74705SXin Li ///
967*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLW instruction.
968*67e74705SXin Li ///
969*67e74705SXin Li /// \param __m
970*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
971*67e74705SXin Li /// \param __count
972*67e74705SXin Li ///    A 32-bit integer value.
973*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
974*67e74705SXin Li ///    values.
975*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srli_pi16(__m64 __m,int __count)976*67e74705SXin Li _mm_srli_pi16(__m64 __m, int __count)
977*67e74705SXin Li {
978*67e74705SXin Li     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
979*67e74705SXin Li }
980*67e74705SXin Li 
981*67e74705SXin Li /// \brief Right-shifts each 32-bit integer element of the first parameter,
982*67e74705SXin Li ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
983*67e74705SXin Li ///    specified by the second parameter, which is a 64-bit integer. High-order
984*67e74705SXin Li ///    bits are cleared. The 32-bit results are packed into a 64-bit integer
985*67e74705SXin Li ///    vector of [2 x i32].
986*67e74705SXin Li ///
987*67e74705SXin Li /// \headerfile <x86intrin.h>
988*67e74705SXin Li ///
989*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLD instruction.
990*67e74705SXin Li ///
991*67e74705SXin Li /// \param __m
992*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
993*67e74705SXin Li /// \param __count
994*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
995*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
996*67e74705SXin Li ///    values.
997*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_pi32(__m64 __m,__m64 __count)998*67e74705SXin Li _mm_srl_pi32(__m64 __m, __m64 __count)
999*67e74705SXin Li {
1000*67e74705SXin Li     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1001*67e74705SXin Li }
1002*67e74705SXin Li 
1003*67e74705SXin Li /// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
1004*67e74705SXin Li ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
1005*67e74705SXin Li ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
1006*67e74705SXin Li ///    integer vector of [2 x i32].
1007*67e74705SXin Li ///
1008*67e74705SXin Li /// \headerfile <x86intrin.h>
1009*67e74705SXin Li ///
1010*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLD instruction.
1011*67e74705SXin Li ///
1012*67e74705SXin Li /// \param __m
1013*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
1014*67e74705SXin Li /// \param __count
1015*67e74705SXin Li ///    A 32-bit integer value.
1016*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1017*67e74705SXin Li ///    values.
1018*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srli_pi32(__m64 __m,int __count)1019*67e74705SXin Li _mm_srli_pi32(__m64 __m, int __count)
1020*67e74705SXin Li {
1021*67e74705SXin Li     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1022*67e74705SXin Li }
1023*67e74705SXin Li 
1024*67e74705SXin Li /// \brief Right-shifts the first 64-bit integer parameter by the number of bits
1025*67e74705SXin Li ///    specified by the second 64-bit integer parameter. High-order bits are
1026*67e74705SXin Li ///    cleared.
1027*67e74705SXin Li ///
1028*67e74705SXin Li /// \headerfile <x86intrin.h>
1029*67e74705SXin Li ///
1030*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLQ instruction.
1031*67e74705SXin Li ///
1032*67e74705SXin Li /// \param __m
1033*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1034*67e74705SXin Li /// \param __count
1035*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1036*67e74705SXin Li /// \returns A 64-bit integer vector containing the right-shifted value.
1037*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srl_si64(__m64 __m,__m64 __count)1038*67e74705SXin Li _mm_srl_si64(__m64 __m, __m64 __count)
1039*67e74705SXin Li {
1040*67e74705SXin Li     return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1041*67e74705SXin Li }
1042*67e74705SXin Li 
1043*67e74705SXin Li /// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
1044*67e74705SXin Li ///    number of bits specified by the second parameter, which is a 32-bit
1045*67e74705SXin Li ///    integer. High-order bits are cleared.
1046*67e74705SXin Li ///
1047*67e74705SXin Li /// \headerfile <x86intrin.h>
1048*67e74705SXin Li ///
1049*67e74705SXin Li /// This intrinsic corresponds to the \c PSRLQ instruction.
1050*67e74705SXin Li ///
1051*67e74705SXin Li /// \param __m
1052*67e74705SXin Li ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1053*67e74705SXin Li /// \param __count
1054*67e74705SXin Li ///    A 32-bit integer value.
1055*67e74705SXin Li /// \returns A 64-bit integer vector containing the right-shifted value.
1056*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_srli_si64(__m64 __m,int __count)1057*67e74705SXin Li _mm_srli_si64(__m64 __m, int __count)
1058*67e74705SXin Li {
1059*67e74705SXin Li     return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1060*67e74705SXin Li }
1061*67e74705SXin Li 
1062*67e74705SXin Li /// \brief Performs a bitwise AND of two 64-bit integer vectors.
1063*67e74705SXin Li ///
1064*67e74705SXin Li /// \headerfile <x86intrin.h>
1065*67e74705SXin Li ///
1066*67e74705SXin Li /// This intrinsic corresponds to the \c PAND instruction.
1067*67e74705SXin Li ///
1068*67e74705SXin Li /// \param __m1
1069*67e74705SXin Li ///    A 64-bit integer vector.
1070*67e74705SXin Li /// \param __m2
1071*67e74705SXin Li ///    A 64-bit integer vector.
1072*67e74705SXin Li /// \returns A 64-bit integer vector containing the bitwise AND of both
1073*67e74705SXin Li ///    parameters.
1074*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_and_si64(__m64 __m1,__m64 __m2)1075*67e74705SXin Li _mm_and_si64(__m64 __m1, __m64 __m2)
1076*67e74705SXin Li {
1077*67e74705SXin Li     return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1078*67e74705SXin Li }
1079*67e74705SXin Li 
1080*67e74705SXin Li /// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
1081*67e74705SXin Li ///    performs a bitwise AND of the intermediate result and the second 64-bit
1082*67e74705SXin Li ///    integer vector.
1083*67e74705SXin Li ///
1084*67e74705SXin Li /// \headerfile <x86intrin.h>
1085*67e74705SXin Li ///
1086*67e74705SXin Li /// This intrinsic corresponds to the \c PANDN instruction.
1087*67e74705SXin Li ///
1088*67e74705SXin Li /// \param __m1
1089*67e74705SXin Li ///    A 64-bit integer vector. The one's complement of this parameter is used
1090*67e74705SXin Li ///    in the bitwise AND.
1091*67e74705SXin Li /// \param __m2
1092*67e74705SXin Li ///    A 64-bit integer vector.
1093*67e74705SXin Li /// \returns A 64-bit integer vector containing the bitwise AND of the second
1094*67e74705SXin Li ///    parameter and the one's complement of the first parameter.
1095*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_andnot_si64(__m64 __m1,__m64 __m2)1096*67e74705SXin Li _mm_andnot_si64(__m64 __m1, __m64 __m2)
1097*67e74705SXin Li {
1098*67e74705SXin Li     return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1099*67e74705SXin Li }
1100*67e74705SXin Li 
1101*67e74705SXin Li /// \brief Performs a bitwise OR of two 64-bit integer vectors.
1102*67e74705SXin Li ///
1103*67e74705SXin Li /// \headerfile <x86intrin.h>
1104*67e74705SXin Li ///
1105*67e74705SXin Li /// This intrinsic corresponds to the \c POR instruction.
1106*67e74705SXin Li ///
1107*67e74705SXin Li /// \param __m1
1108*67e74705SXin Li ///    A 64-bit integer vector.
1109*67e74705SXin Li /// \param __m2
1110*67e74705SXin Li ///    A 64-bit integer vector.
1111*67e74705SXin Li /// \returns A 64-bit integer vector containing the bitwise OR of both
1112*67e74705SXin Li ///    parameters.
1113*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_or_si64(__m64 __m1,__m64 __m2)1114*67e74705SXin Li _mm_or_si64(__m64 __m1, __m64 __m2)
1115*67e74705SXin Li {
1116*67e74705SXin Li     return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1117*67e74705SXin Li }
1118*67e74705SXin Li 
1119*67e74705SXin Li /// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
1120*67e74705SXin Li ///
1121*67e74705SXin Li /// \headerfile <x86intrin.h>
1122*67e74705SXin Li ///
1123*67e74705SXin Li /// This intrinsic corresponds to the \c PXOR instruction.
1124*67e74705SXin Li ///
1125*67e74705SXin Li /// \param __m1
1126*67e74705SXin Li ///    A 64-bit integer vector.
1127*67e74705SXin Li /// \param __m2
1128*67e74705SXin Li ///    A 64-bit integer vector.
1129*67e74705SXin Li /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1130*67e74705SXin Li ///    parameters.
1131*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_xor_si64(__m64 __m1,__m64 __m2)1132*67e74705SXin Li _mm_xor_si64(__m64 __m1, __m64 __m2)
1133*67e74705SXin Li {
1134*67e74705SXin Li     return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1135*67e74705SXin Li }
1136*67e74705SXin Li 
1137*67e74705SXin Li /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1138*67e74705SXin Li ///    [8 x i8] to determine if the element of the first vector is equal to the
1139*67e74705SXin Li ///    corresponding element of the second vector. The comparison yields 0 for
1140*67e74705SXin Li ///    false, 0xFF for true.
1141*67e74705SXin Li ///
1142*67e74705SXin Li /// \headerfile <x86intrin.h>
1143*67e74705SXin Li ///
1144*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPEQB instruction.
1145*67e74705SXin Li ///
1146*67e74705SXin Li /// \param __m1
1147*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
1148*67e74705SXin Li /// \param __m2
1149*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
1150*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1151*67e74705SXin Li ///    results.
1152*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpeq_pi8(__m64 __m1,__m64 __m2)1153*67e74705SXin Li _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1154*67e74705SXin Li {
1155*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1156*67e74705SXin Li }
1157*67e74705SXin Li 
1158*67e74705SXin Li /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1159*67e74705SXin Li ///    [4 x i16] to determine if the element of the first vector is equal to the
1160*67e74705SXin Li ///    corresponding element of the second vector. The comparison yields 0 for
1161*67e74705SXin Li ///    false, 0xFFFF for true.
1162*67e74705SXin Li ///
1163*67e74705SXin Li /// \headerfile <x86intrin.h>
1164*67e74705SXin Li ///
1165*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPEQW instruction.
1166*67e74705SXin Li ///
1167*67e74705SXin Li /// \param __m1
1168*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
1169*67e74705SXin Li /// \param __m2
1170*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
1171*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1172*67e74705SXin Li ///    results.
1173*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpeq_pi16(__m64 __m1,__m64 __m2)1174*67e74705SXin Li _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1175*67e74705SXin Li {
1176*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1177*67e74705SXin Li }
1178*67e74705SXin Li 
1179*67e74705SXin Li /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1180*67e74705SXin Li ///    [2 x i32] to determine if the element of the first vector is equal to the
1181*67e74705SXin Li ///    corresponding element of the second vector. The comparison yields 0 for
1182*67e74705SXin Li ///    false, 0xFFFFFFFF for true.
1183*67e74705SXin Li ///
1184*67e74705SXin Li /// \headerfile <x86intrin.h>
1185*67e74705SXin Li ///
1186*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPEQD instruction.
1187*67e74705SXin Li ///
1188*67e74705SXin Li /// \param __m1
1189*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
1190*67e74705SXin Li /// \param __m2
1191*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
1192*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1193*67e74705SXin Li ///    results.
1194*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpeq_pi32(__m64 __m1,__m64 __m2)1195*67e74705SXin Li _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1196*67e74705SXin Li {
1197*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1198*67e74705SXin Li }
1199*67e74705SXin Li 
1200*67e74705SXin Li /// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
1201*67e74705SXin Li ///    [8 x i8] to determine if the element of the first vector is greater than
1202*67e74705SXin Li ///    the corresponding element of the second vector. The comparison yields 0
1203*67e74705SXin Li ///    for false, 0xFF for true.
1204*67e74705SXin Li ///
1205*67e74705SXin Li /// \headerfile <x86intrin.h>
1206*67e74705SXin Li ///
1207*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPGTB instruction.
1208*67e74705SXin Li ///
1209*67e74705SXin Li /// \param __m1
1210*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
1211*67e74705SXin Li /// \param __m2
1212*67e74705SXin Li ///    A 64-bit integer vector of [8 x i8].
1213*67e74705SXin Li /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1214*67e74705SXin Li ///    results.
1215*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpgt_pi8(__m64 __m1,__m64 __m2)1216*67e74705SXin Li _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1217*67e74705SXin Li {
1218*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1219*67e74705SXin Li }
1220*67e74705SXin Li 
1221*67e74705SXin Li /// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
1222*67e74705SXin Li ///    [4 x i16] to determine if the element of the first vector is greater than
1223*67e74705SXin Li ///    the corresponding element of the second vector. The comparison yields 0
1224*67e74705SXin Li ///    for false, 0xFFFF for true.
1225*67e74705SXin Li ///
1226*67e74705SXin Li /// \headerfile <x86intrin.h>
1227*67e74705SXin Li ///
1228*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPGTW instruction.
1229*67e74705SXin Li ///
1230*67e74705SXin Li /// \param __m1
1231*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
1232*67e74705SXin Li /// \param __m2
1233*67e74705SXin Li ///    A 64-bit integer vector of [4 x i16].
1234*67e74705SXin Li /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1235*67e74705SXin Li ///    results.
1236*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpgt_pi16(__m64 __m1,__m64 __m2)1237*67e74705SXin Li _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1238*67e74705SXin Li {
1239*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1240*67e74705SXin Li }
1241*67e74705SXin Li 
1242*67e74705SXin Li /// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
1243*67e74705SXin Li ///    [2 x i32] to determine if the element of the first vector is greater than
1244*67e74705SXin Li ///    the corresponding element of the second vector. The comparison yields 0
1245*67e74705SXin Li ///    for false, 0xFFFFFFFF for true.
1246*67e74705SXin Li ///
1247*67e74705SXin Li /// \headerfile <x86intrin.h>
1248*67e74705SXin Li ///
1249*67e74705SXin Li /// This intrinsic corresponds to the \c PCMPGTD instruction.
1250*67e74705SXin Li ///
1251*67e74705SXin Li /// \param __m1
1252*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
1253*67e74705SXin Li /// \param __m2
1254*67e74705SXin Li ///    A 64-bit integer vector of [2 x i32].
1255*67e74705SXin Li /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1256*67e74705SXin Li ///    results.
1257*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_cmpgt_pi32(__m64 __m1,__m64 __m2)1258*67e74705SXin Li _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1259*67e74705SXin Li {
1260*67e74705SXin Li     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1261*67e74705SXin Li }
1262*67e74705SXin Li 
1263*67e74705SXin Li /// \brief Constructs a 64-bit integer vector initialized to zero.
1264*67e74705SXin Li ///
1265*67e74705SXin Li /// \headerfile <x86intrin.h>
1266*67e74705SXin Li ///
1267*67e74705SXin Li /// This intrinsic corresponds to the the \c VXORPS / XORPS instruction.
1268*67e74705SXin Li ///
1269*67e74705SXin Li /// \returns An initialized 64-bit integer vector with all elements set to zero.
1270*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_setzero_si64(void)1271*67e74705SXin Li _mm_setzero_si64(void)
1272*67e74705SXin Li {
1273*67e74705SXin Li     return (__m64){ 0LL };
1274*67e74705SXin Li }
1275*67e74705SXin Li 
1276*67e74705SXin Li /// \brief Constructs a 64-bit integer vector initialized with the specified
1277*67e74705SXin Li ///    32-bit integer values.
1278*67e74705SXin Li ///
1279*67e74705SXin Li /// \headerfile <x86intrin.h>
1280*67e74705SXin Li ///
1281*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1282*67e74705SXin Li ///    instruction.
1283*67e74705SXin Li ///
1284*67e74705SXin Li /// \param __i1
1285*67e74705SXin Li ///    A 32-bit integer value used to initialize the upper 32 bits of the
1286*67e74705SXin Li ///    result.
1287*67e74705SXin Li /// \param __i0
1288*67e74705SXin Li ///    A 32-bit integer value used to initialize the lower 32 bits of the
1289*67e74705SXin Li ///    result.
1290*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1291*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set_pi32(int __i1,int __i0)1292*67e74705SXin Li _mm_set_pi32(int __i1, int __i0)
1293*67e74705SXin Li {
1294*67e74705SXin Li     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1295*67e74705SXin Li }
1296*67e74705SXin Li 
1297*67e74705SXin Li /// \brief Constructs a 64-bit integer vector initialized with the specified
1298*67e74705SXin Li ///    16-bit integer values.
1299*67e74705SXin Li ///
1300*67e74705SXin Li /// \headerfile <x86intrin.h>
1301*67e74705SXin Li ///
1302*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1303*67e74705SXin Li ///    instruction.
1304*67e74705SXin Li ///
1305*67e74705SXin Li /// \param __s3
1306*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [63:48] of the result.
1307*67e74705SXin Li /// \param __s2
1308*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [47:32] of the result.
1309*67e74705SXin Li /// \param __s1
1310*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [31:16] of the result.
1311*67e74705SXin Li /// \param __s0
1312*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [15:0] of the result.
1313*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1314*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set_pi16(short __s3,short __s2,short __s1,short __s0)1315*67e74705SXin Li _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1316*67e74705SXin Li {
1317*67e74705SXin Li     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1318*67e74705SXin Li }
1319*67e74705SXin Li 
1320*67e74705SXin Li /// \brief Constructs a 64-bit integer vector initialized with the specified
1321*67e74705SXin Li ///    8-bit integer values.
1322*67e74705SXin Li ///
1323*67e74705SXin Li /// \headerfile <x86intrin.h>
1324*67e74705SXin Li ///
1325*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1326*67e74705SXin Li ///    instruction.
1327*67e74705SXin Li ///
1328*67e74705SXin Li /// \param __b7
1329*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [63:56] of the result.
1330*67e74705SXin Li /// \param __b6
1331*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [55:48] of the result.
1332*67e74705SXin Li /// \param __b5
1333*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [47:40] of the result.
1334*67e74705SXin Li /// \param __b4
1335*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [39:32] of the result.
1336*67e74705SXin Li /// \param __b3
1337*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [31:24] of the result.
1338*67e74705SXin Li /// \param __b2
1339*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [23:16] of the result.
1340*67e74705SXin Li /// \param __b1
1341*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [15:8] of the result.
1342*67e74705SXin Li /// \param __b0
1343*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [7:0] of the result.
1344*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1345*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set_pi8(char __b7,char __b6,char __b5,char __b4,char __b3,char __b2,char __b1,char __b0)1346*67e74705SXin Li _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1347*67e74705SXin Li             char __b1, char __b0)
1348*67e74705SXin Li {
1349*67e74705SXin Li     return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1350*67e74705SXin Li                                                __b4, __b5, __b6, __b7);
1351*67e74705SXin Li }
1352*67e74705SXin Li 
1353*67e74705SXin Li /// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
1354*67e74705SXin Li ///    32-bit integer vector elements set to the specified 32-bit integer
1355*67e74705SXin Li ///    value.
1356*67e74705SXin Li ///
1357*67e74705SXin Li /// \headerfile <x86intrin.h>
1358*67e74705SXin Li ///
1359*67e74705SXin Li /// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction.
1360*67e74705SXin Li ///
1361*67e74705SXin Li /// \param __i
1362*67e74705SXin Li ///    A 32-bit integer value used to initialize each vector element of the
1363*67e74705SXin Li ///    result.
1364*67e74705SXin Li /// \returns An initialized 64-bit integer vector of [2 x i32].
1365*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set1_pi32(int __i)1366*67e74705SXin Li _mm_set1_pi32(int __i)
1367*67e74705SXin Li {
1368*67e74705SXin Li     return _mm_set_pi32(__i, __i);
1369*67e74705SXin Li }
1370*67e74705SXin Li 
1371*67e74705SXin Li /// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
1372*67e74705SXin Li ///    16-bit integer vector elements set to the specified 16-bit integer
1373*67e74705SXin Li ///    value.
1374*67e74705SXin Li ///
1375*67e74705SXin Li /// \headerfile <x86intrin.h>
1376*67e74705SXin Li ///
1377*67e74705SXin Li /// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction.
1378*67e74705SXin Li ///
1379*67e74705SXin Li /// \param __w
1380*67e74705SXin Li ///    A 16-bit integer value used to initialize each vector element of the
1381*67e74705SXin Li ///    result.
1382*67e74705SXin Li /// \returns An initialized 64-bit integer vector of [4 x i16].
1383*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set1_pi16(short __w)1384*67e74705SXin Li _mm_set1_pi16(short __w)
1385*67e74705SXin Li {
1386*67e74705SXin Li     return _mm_set_pi16(__w, __w, __w, __w);
1387*67e74705SXin Li }
1388*67e74705SXin Li 
1389*67e74705SXin Li /// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
1390*67e74705SXin Li ///    8-bit integer vector elements set to the specified 8-bit integer value.
1391*67e74705SXin Li ///
1392*67e74705SXin Li /// \headerfile <x86intrin.h>
1393*67e74705SXin Li ///
1394*67e74705SXin Li /// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW +
1395*67e74705SXin Li ///    PSHUFLW instruction.
1396*67e74705SXin Li ///
1397*67e74705SXin Li /// \param __b
1398*67e74705SXin Li ///    An 8-bit integer value used to initialize each vector element of the
1399*67e74705SXin Li ///    result.
1400*67e74705SXin Li /// \returns An initialized 64-bit integer vector of [8 x i8].
1401*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_set1_pi8(char __b)1402*67e74705SXin Li _mm_set1_pi8(char __b)
1403*67e74705SXin Li {
1404*67e74705SXin Li     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1405*67e74705SXin Li }
1406*67e74705SXin Li 
1407*67e74705SXin Li /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1408*67e74705SXin Li ///    the specified 32-bit integer values.
1409*67e74705SXin Li ///
1410*67e74705SXin Li /// \headerfile <x86intrin.h>
1411*67e74705SXin Li ///
1412*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1413*67e74705SXin Li ///    instruction.
1414*67e74705SXin Li ///
1415*67e74705SXin Li /// \param __i0
1416*67e74705SXin Li ///    A 32-bit integer value used to initialize the lower 32 bits of the
1417*67e74705SXin Li ///    result.
1418*67e74705SXin Li /// \param __i1
1419*67e74705SXin Li ///    A 32-bit integer value used to initialize the upper 32 bits of the
1420*67e74705SXin Li ///    result.
1421*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1422*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_setr_pi32(int __i0,int __i1)1423*67e74705SXin Li _mm_setr_pi32(int __i0, int __i1)
1424*67e74705SXin Li {
1425*67e74705SXin Li     return _mm_set_pi32(__i1, __i0);
1426*67e74705SXin Li }
1427*67e74705SXin Li 
1428*67e74705SXin Li /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1429*67e74705SXin Li ///    the specified 16-bit integer values.
1430*67e74705SXin Li ///
1431*67e74705SXin Li /// \headerfile <x86intrin.h>
1432*67e74705SXin Li ///
1433*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1434*67e74705SXin Li ///    instruction.
1435*67e74705SXin Li ///
1436*67e74705SXin Li /// \param __w0
1437*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [15:0] of the result.
1438*67e74705SXin Li /// \param __w1
1439*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [31:16] of the result.
1440*67e74705SXin Li /// \param __w2
1441*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [47:32] of the result.
1442*67e74705SXin Li /// \param __w3
1443*67e74705SXin Li ///    A 16-bit integer value used to initialize bits [63:48] of the result.
1444*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1445*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_setr_pi16(short __w0,short __w1,short __w2,short __w3)1446*67e74705SXin Li _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1447*67e74705SXin Li {
1448*67e74705SXin Li     return _mm_set_pi16(__w3, __w2, __w1, __w0);
1449*67e74705SXin Li }
1450*67e74705SXin Li 
1451*67e74705SXin Li /// \brief Constructs a 64-bit integer vector, initialized in reverse order with
1452*67e74705SXin Li ///    the specified 8-bit integer values.
1453*67e74705SXin Li ///
1454*67e74705SXin Li /// \headerfile <x86intrin.h>
1455*67e74705SXin Li ///
1456*67e74705SXin Li /// This intrinsic is a utility function and does not correspond to a specific
1457*67e74705SXin Li ///    instruction.
1458*67e74705SXin Li ///
1459*67e74705SXin Li /// \param __b0
1460*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [7:0] of the result.
1461*67e74705SXin Li /// \param __b1
1462*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [15:8] of the result.
1463*67e74705SXin Li /// \param __b2
1464*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [23:16] of the result.
1465*67e74705SXin Li /// \param __b3
1466*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [31:24] of the result.
1467*67e74705SXin Li /// \param __b4
1468*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [39:32] of the result.
1469*67e74705SXin Li /// \param __b5
1470*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [47:40] of the result.
1471*67e74705SXin Li /// \param __b6
1472*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [55:48] of the result.
1473*67e74705SXin Li /// \param __b7
1474*67e74705SXin Li ///    An 8-bit integer value used to initialize bits [63:56] of the result.
1475*67e74705SXin Li /// \returns An initialized 64-bit integer vector.
1476*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_setr_pi8(char __b0,char __b1,char __b2,char __b3,char __b4,char __b5,char __b6,char __b7)1477*67e74705SXin Li _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1478*67e74705SXin Li              char __b6, char __b7)
1479*67e74705SXin Li {
1480*67e74705SXin Li     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1481*67e74705SXin Li }
1482*67e74705SXin Li 
1483*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
1484*67e74705SXin Li 
1485*67e74705SXin Li /* Aliases for compatibility. */
1486*67e74705SXin Li #define _m_empty _mm_empty
1487*67e74705SXin Li #define _m_from_int _mm_cvtsi32_si64
1488*67e74705SXin Li #define _m_from_int64 _mm_cvtsi64_m64
1489*67e74705SXin Li #define _m_to_int _mm_cvtsi64_si32
1490*67e74705SXin Li #define _m_to_int64 _mm_cvtm64_si64
1491*67e74705SXin Li #define _m_packsswb _mm_packs_pi16
1492*67e74705SXin Li #define _m_packssdw _mm_packs_pi32
1493*67e74705SXin Li #define _m_packuswb _mm_packs_pu16
1494*67e74705SXin Li #define _m_punpckhbw _mm_unpackhi_pi8
1495*67e74705SXin Li #define _m_punpckhwd _mm_unpackhi_pi16
1496*67e74705SXin Li #define _m_punpckhdq _mm_unpackhi_pi32
1497*67e74705SXin Li #define _m_punpcklbw _mm_unpacklo_pi8
1498*67e74705SXin Li #define _m_punpcklwd _mm_unpacklo_pi16
1499*67e74705SXin Li #define _m_punpckldq _mm_unpacklo_pi32
1500*67e74705SXin Li #define _m_paddb _mm_add_pi8
1501*67e74705SXin Li #define _m_paddw _mm_add_pi16
1502*67e74705SXin Li #define _m_paddd _mm_add_pi32
1503*67e74705SXin Li #define _m_paddsb _mm_adds_pi8
1504*67e74705SXin Li #define _m_paddsw _mm_adds_pi16
1505*67e74705SXin Li #define _m_paddusb _mm_adds_pu8
1506*67e74705SXin Li #define _m_paddusw _mm_adds_pu16
1507*67e74705SXin Li #define _m_psubb _mm_sub_pi8
1508*67e74705SXin Li #define _m_psubw _mm_sub_pi16
1509*67e74705SXin Li #define _m_psubd _mm_sub_pi32
1510*67e74705SXin Li #define _m_psubsb _mm_subs_pi8
1511*67e74705SXin Li #define _m_psubsw _mm_subs_pi16
1512*67e74705SXin Li #define _m_psubusb _mm_subs_pu8
1513*67e74705SXin Li #define _m_psubusw _mm_subs_pu16
1514*67e74705SXin Li #define _m_pmaddwd _mm_madd_pi16
1515*67e74705SXin Li #define _m_pmulhw _mm_mulhi_pi16
1516*67e74705SXin Li #define _m_pmullw _mm_mullo_pi16
1517*67e74705SXin Li #define _m_psllw _mm_sll_pi16
1518*67e74705SXin Li #define _m_psllwi _mm_slli_pi16
1519*67e74705SXin Li #define _m_pslld _mm_sll_pi32
1520*67e74705SXin Li #define _m_pslldi _mm_slli_pi32
1521*67e74705SXin Li #define _m_psllq _mm_sll_si64
1522*67e74705SXin Li #define _m_psllqi _mm_slli_si64
1523*67e74705SXin Li #define _m_psraw _mm_sra_pi16
1524*67e74705SXin Li #define _m_psrawi _mm_srai_pi16
1525*67e74705SXin Li #define _m_psrad _mm_sra_pi32
1526*67e74705SXin Li #define _m_psradi _mm_srai_pi32
1527*67e74705SXin Li #define _m_psrlw _mm_srl_pi16
1528*67e74705SXin Li #define _m_psrlwi _mm_srli_pi16
1529*67e74705SXin Li #define _m_psrld _mm_srl_pi32
1530*67e74705SXin Li #define _m_psrldi _mm_srli_pi32
1531*67e74705SXin Li #define _m_psrlq _mm_srl_si64
1532*67e74705SXin Li #define _m_psrlqi _mm_srli_si64
1533*67e74705SXin Li #define _m_pand _mm_and_si64
1534*67e74705SXin Li #define _m_pandn _mm_andnot_si64
1535*67e74705SXin Li #define _m_por _mm_or_si64
1536*67e74705SXin Li #define _m_pxor _mm_xor_si64
1537*67e74705SXin Li #define _m_pcmpeqb _mm_cmpeq_pi8
1538*67e74705SXin Li #define _m_pcmpeqw _mm_cmpeq_pi16
1539*67e74705SXin Li #define _m_pcmpeqd _mm_cmpeq_pi32
1540*67e74705SXin Li #define _m_pcmpgtb _mm_cmpgt_pi8
1541*67e74705SXin Li #define _m_pcmpgtw _mm_cmpgt_pi16
1542*67e74705SXin Li #define _m_pcmpgtd _mm_cmpgt_pi32
1543*67e74705SXin Li 
1544*67e74705SXin Li #endif /* __MMINTRIN_H */
1545*67e74705SXin Li 
1546