xref: /aosp_15_r20/external/clang/lib/Headers/tmmintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li 
24*67e74705SXin Li #ifndef __TMMINTRIN_H
25*67e74705SXin Li #define __TMMINTRIN_H
26*67e74705SXin Li 
27*67e74705SXin Li #include <pmmintrin.h>
28*67e74705SXin Li 
29*67e74705SXin Li /* Define the default attributes for the functions in this file. */
30*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
31*67e74705SXin Li 
32*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 8-bit signed
33*67e74705SXin Li ///    integers in the source operand and stores the 8-bit unsigned integer
34*67e74705SXin Li ///    results in the destination.
35*67e74705SXin Li ///
36*67e74705SXin Li /// \headerfile <x86intrin.h>
37*67e74705SXin Li ///
38*67e74705SXin Li /// This intrinsic corresponds to the \c PABSB instruction.
39*67e74705SXin Li ///
40*67e74705SXin Li /// \param __a
41*67e74705SXin Li ///    A 64-bit vector of [8 x i8].
42*67e74705SXin Li /// \returns A 64-bit integer vector containing the absolute values of the
43*67e74705SXin Li ///    elements in the operand.
44*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi8(__m64 __a)45*67e74705SXin Li _mm_abs_pi8(__m64 __a)
46*67e74705SXin Li {
47*67e74705SXin Li     return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
48*67e74705SXin Li }
49*67e74705SXin Li 
50*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 8-bit signed
51*67e74705SXin Li ///    integers in the source operand and stores the 8-bit unsigned integer
52*67e74705SXin Li ///    results in the destination.
53*67e74705SXin Li ///
54*67e74705SXin Li /// \headerfile <x86intrin.h>
55*67e74705SXin Li ///
56*67e74705SXin Li /// This intrinsic corresponds to the \c VPABSB instruction.
57*67e74705SXin Li ///
58*67e74705SXin Li /// \param __a
59*67e74705SXin Li ///    A 128-bit vector of [16 x i8].
60*67e74705SXin Li /// \returns A 128-bit integer vector containing the absolute values of the
61*67e74705SXin Li ///    elements in the operand.
62*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi8(__m128i __a)63*67e74705SXin Li _mm_abs_epi8(__m128i __a)
64*67e74705SXin Li {
65*67e74705SXin Li     return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
66*67e74705SXin Li }
67*67e74705SXin Li 
68*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 16-bit signed
69*67e74705SXin Li ///    integers in the source operand and stores the 16-bit unsigned integer
70*67e74705SXin Li ///    results in the destination.
71*67e74705SXin Li ///
72*67e74705SXin Li /// \headerfile <x86intrin.h>
73*67e74705SXin Li ///
74*67e74705SXin Li /// This intrinsic corresponds to the \c PABSW instruction.
75*67e74705SXin Li ///
76*67e74705SXin Li /// \param __a
77*67e74705SXin Li ///    A 64-bit vector of [4 x i16].
78*67e74705SXin Li /// \returns A 64-bit integer vector containing the absolute values of the
79*67e74705SXin Li ///    elements in the operand.
80*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi16(__m64 __a)81*67e74705SXin Li _mm_abs_pi16(__m64 __a)
82*67e74705SXin Li {
83*67e74705SXin Li     return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
84*67e74705SXin Li }
85*67e74705SXin Li 
86*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 16-bit signed
87*67e74705SXin Li ///    integers in the source operand and stores the 16-bit unsigned integer
88*67e74705SXin Li ///    results in the destination.
89*67e74705SXin Li ///
90*67e74705SXin Li /// \headerfile <x86intrin.h>
91*67e74705SXin Li ///
92*67e74705SXin Li /// This intrinsic corresponds to the \c VPABSW instruction.
93*67e74705SXin Li ///
94*67e74705SXin Li /// \param __a
95*67e74705SXin Li ///    A 128-bit vector of [8 x i16].
96*67e74705SXin Li /// \returns A 128-bit integer vector containing the absolute values of the
97*67e74705SXin Li ///    elements in the operand.
98*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi16(__m128i __a)99*67e74705SXin Li _mm_abs_epi16(__m128i __a)
100*67e74705SXin Li {
101*67e74705SXin Li     return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
102*67e74705SXin Li }
103*67e74705SXin Li 
104*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 32-bit signed
105*67e74705SXin Li ///    integers in the source operand and stores the 32-bit unsigned integer
106*67e74705SXin Li ///    results in the destination.
107*67e74705SXin Li ///
108*67e74705SXin Li /// \headerfile <x86intrin.h>
109*67e74705SXin Li ///
110*67e74705SXin Li /// This intrinsic corresponds to the \c PABSD instruction.
111*67e74705SXin Li ///
112*67e74705SXin Li /// \param __a
113*67e74705SXin Li ///    A 64-bit vector of [2 x i32].
114*67e74705SXin Li /// \returns A 64-bit integer vector containing the absolute values of the
115*67e74705SXin Li ///    elements in the operand.
116*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_abs_pi32(__m64 __a)117*67e74705SXin Li _mm_abs_pi32(__m64 __a)
118*67e74705SXin Li {
119*67e74705SXin Li     return (__m64)__builtin_ia32_pabsd((__v2si)__a);
120*67e74705SXin Li }
121*67e74705SXin Li 
122*67e74705SXin Li /// \brief Computes the absolute value of each of the packed 32-bit signed
123*67e74705SXin Li ///    integers in the source operand and stores the 32-bit unsigned integer
124*67e74705SXin Li ///    results in the destination.
125*67e74705SXin Li ///
126*67e74705SXin Li /// \headerfile <x86intrin.h>
127*67e74705SXin Li ///
128*67e74705SXin Li /// This intrinsic corresponds to the \c VPABSD instruction.
129*67e74705SXin Li ///
130*67e74705SXin Li /// \param __a
131*67e74705SXin Li ///    A 128-bit vector of [4 x i32].
132*67e74705SXin Li /// \returns A 128-bit integer vector containing the absolute values of the
133*67e74705SXin Li ///    elements in the operand.
134*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi32(__m128i __a)135*67e74705SXin Li _mm_abs_epi32(__m128i __a)
136*67e74705SXin Li {
137*67e74705SXin Li     return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
138*67e74705SXin Li }
139*67e74705SXin Li 
140*67e74705SXin Li /// \brief Concatenates the two 128-bit integer vector operands, and
141*67e74705SXin Li ///    right-shifts the result by the number of bytes specified in the immediate
142*67e74705SXin Li ///    operand.
143*67e74705SXin Li ///
144*67e74705SXin Li /// \headerfile <x86intrin.h>
145*67e74705SXin Li ///
146*67e74705SXin Li /// \code
147*67e74705SXin Li /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
148*67e74705SXin Li /// \endcode
149*67e74705SXin Li ///
150*67e74705SXin Li /// This intrinsic corresponds to the \c PALIGNR instruction.
151*67e74705SXin Li ///
152*67e74705SXin Li /// \param a
153*67e74705SXin Li ///    A 128-bit vector of [16 x i8] containing one of the source operands.
154*67e74705SXin Li /// \param b
155*67e74705SXin Li ///    A 128-bit vector of [16 x i8] containing one of the source operands.
156*67e74705SXin Li /// \param n
157*67e74705SXin Li ///    An immediate operand specifying how many bytes to right-shift the result.
158*67e74705SXin Li /// \returns A 128-bit integer vector containing the concatenated right-shifted
159*67e74705SXin Li ///    value.
160*67e74705SXin Li #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
161*67e74705SXin Li   (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
162*67e74705SXin Li                                      (__v16qi)(__m128i)(b), (n)); })
163*67e74705SXin Li 
164*67e74705SXin Li /// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
165*67e74705SXin Li ///    the result by the number of bytes specified in the immediate operand.
166*67e74705SXin Li ///
167*67e74705SXin Li /// \headerfile <x86intrin.h>
168*67e74705SXin Li ///
169*67e74705SXin Li /// \code
170*67e74705SXin Li /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
171*67e74705SXin Li /// \endcode
172*67e74705SXin Li ///
173*67e74705SXin Li /// This intrinsic corresponds to the \c PALIGNR instruction.
174*67e74705SXin Li ///
175*67e74705SXin Li /// \param a
176*67e74705SXin Li ///    A 64-bit vector of [8 x i8] containing one of the source operands.
177*67e74705SXin Li /// \param b
178*67e74705SXin Li ///    A 64-bit vector of [8 x i8] containing one of the source operands.
179*67e74705SXin Li /// \param n
180*67e74705SXin Li ///    An immediate operand specifying how many bytes to right-shift the result.
181*67e74705SXin Li /// \returns A 64-bit integer vector containing the concatenated right-shifted
182*67e74705SXin Li ///    value.
183*67e74705SXin Li #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
184*67e74705SXin Li   (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
185*67e74705SXin Li 
186*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
187*67e74705SXin Li ///    128-bit vectors of [8 x i16].
188*67e74705SXin Li ///
189*67e74705SXin Li /// \headerfile <x86intrin.h>
190*67e74705SXin Li ///
191*67e74705SXin Li /// This intrinsic corresponds to the \c VPHADDW instruction.
192*67e74705SXin Li ///
193*67e74705SXin Li /// \param __a
194*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
195*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
196*67e74705SXin Li ///    destination.
197*67e74705SXin Li /// \param __b
198*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
199*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
200*67e74705SXin Li ///    destination.
201*67e74705SXin Li /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
202*67e74705SXin Li ///    both operands.
203*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a,__m128i __b)204*67e74705SXin Li _mm_hadd_epi16(__m128i __a, __m128i __b)
205*67e74705SXin Li {
206*67e74705SXin Li     return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
207*67e74705SXin Li }
208*67e74705SXin Li 
209*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
210*67e74705SXin Li ///    128-bit vectors of [4 x i32].
211*67e74705SXin Li ///
212*67e74705SXin Li /// \headerfile <x86intrin.h>
213*67e74705SXin Li ///
214*67e74705SXin Li /// This intrinsic corresponds to the \c VPHADDD instruction.
215*67e74705SXin Li ///
216*67e74705SXin Li /// \param __a
217*67e74705SXin Li ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
218*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
219*67e74705SXin Li ///    destination.
220*67e74705SXin Li /// \param __b
221*67e74705SXin Li ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
222*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
223*67e74705SXin Li ///    destination.
224*67e74705SXin Li /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
225*67e74705SXin Li ///    both operands.
226*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi32(__m128i __a,__m128i __b)227*67e74705SXin Li _mm_hadd_epi32(__m128i __a, __m128i __b)
228*67e74705SXin Li {
229*67e74705SXin Li     return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
230*67e74705SXin Li }
231*67e74705SXin Li 
232*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
233*67e74705SXin Li ///    64-bit vectors of [4 x i16].
234*67e74705SXin Li ///
235*67e74705SXin Li /// \headerfile <x86intrin.h>
236*67e74705SXin Li ///
237*67e74705SXin Li /// This intrinsic corresponds to the \c PHADDW instruction.
238*67e74705SXin Li ///
239*67e74705SXin Li /// \param __a
240*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
241*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
242*67e74705SXin Li ///    destination.
243*67e74705SXin Li /// \param __b
244*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
245*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
246*67e74705SXin Li ///    destination.
247*67e74705SXin Li /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
248*67e74705SXin Li ///    operands.
249*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi16(__m64 __a,__m64 __b)250*67e74705SXin Li _mm_hadd_pi16(__m64 __a, __m64 __b)
251*67e74705SXin Li {
252*67e74705SXin Li     return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
253*67e74705SXin Li }
254*67e74705SXin Li 
255*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
256*67e74705SXin Li ///    64-bit vectors of [2 x i32].
257*67e74705SXin Li ///
258*67e74705SXin Li /// \headerfile <x86intrin.h>
259*67e74705SXin Li ///
260*67e74705SXin Li /// This intrinsic corresponds to the \c PHADDD instruction.
261*67e74705SXin Li ///
262*67e74705SXin Li /// \param __a
263*67e74705SXin Li ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
264*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
265*67e74705SXin Li ///    destination.
266*67e74705SXin Li /// \param __b
267*67e74705SXin Li ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
268*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
269*67e74705SXin Li ///    destination.
270*67e74705SXin Li /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
271*67e74705SXin Li ///    operands.
272*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadd_pi32(__m64 __a,__m64 __b)273*67e74705SXin Li _mm_hadd_pi32(__m64 __a, __m64 __b)
274*67e74705SXin Li {
275*67e74705SXin Li     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
276*67e74705SXin Li }
277*67e74705SXin Li 
278*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
279*67e74705SXin Li ///    128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
280*67e74705SXin Li ///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
281*67e74705SXin Li ///
282*67e74705SXin Li /// \headerfile <x86intrin.h>
283*67e74705SXin Li ///
284*67e74705SXin Li /// This intrinsic corresponds to the \c VPHADDSW instruction.
285*67e74705SXin Li ///
286*67e74705SXin Li /// \param __a
287*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
288*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
289*67e74705SXin Li ///    destination.
290*67e74705SXin Li /// \param __b
291*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
292*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
293*67e74705SXin Li ///    destination.
294*67e74705SXin Li /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
295*67e74705SXin Li ///    sums of both operands.
296*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadds_epi16(__m128i __a,__m128i __b)297*67e74705SXin Li _mm_hadds_epi16(__m128i __a, __m128i __b)
298*67e74705SXin Li {
299*67e74705SXin Li     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
300*67e74705SXin Li }
301*67e74705SXin Li 
302*67e74705SXin Li /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
303*67e74705SXin Li ///    64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
304*67e74705SXin Li ///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
305*67e74705SXin Li ///
306*67e74705SXin Li /// \headerfile <x86intrin.h>
307*67e74705SXin Li ///
308*67e74705SXin Li /// This intrinsic corresponds to the \c PHADDSW instruction.
309*67e74705SXin Li ///
310*67e74705SXin Li /// \param __a
311*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
312*67e74705SXin Li ///    horizontal sums of the values are stored in the lower bits of the
313*67e74705SXin Li ///    destination.
314*67e74705SXin Li /// \param __b
315*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
316*67e74705SXin Li ///    horizontal sums of the values are stored in the upper bits of the
317*67e74705SXin Li ///    destination.
318*67e74705SXin Li /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319*67e74705SXin Li ///    sums of both operands.
320*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hadds_pi16(__m64 __a,__m64 __b)321*67e74705SXin Li _mm_hadds_pi16(__m64 __a, __m64 __b)
322*67e74705SXin Li {
323*67e74705SXin Li     return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
324*67e74705SXin Li }
325*67e74705SXin Li 
326*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
327*67e74705SXin Li ///    packed 128-bit vectors of [8 x i16].
328*67e74705SXin Li ///
329*67e74705SXin Li /// \headerfile <x86intrin.h>
330*67e74705SXin Li ///
331*67e74705SXin Li /// This intrinsic corresponds to the \c VPHSUBW instruction.
332*67e74705SXin Li ///
333*67e74705SXin Li /// \param __a
334*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
335*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
336*67e74705SXin Li ///    the destination.
337*67e74705SXin Li /// \param __b
338*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
339*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
340*67e74705SXin Li ///    the destination.
341*67e74705SXin Li /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342*67e74705SXin Li ///    of both operands.
343*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi16(__m128i __a,__m128i __b)344*67e74705SXin Li _mm_hsub_epi16(__m128i __a, __m128i __b)
345*67e74705SXin Li {
346*67e74705SXin Li     return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
347*67e74705SXin Li }
348*67e74705SXin Li 
349*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
350*67e74705SXin Li ///    packed 128-bit vectors of [4 x i32].
351*67e74705SXin Li ///
352*67e74705SXin Li /// \headerfile <x86intrin.h>
353*67e74705SXin Li ///
354*67e74705SXin Li /// This intrinsic corresponds to the \c VPHSUBD instruction.
355*67e74705SXin Li ///
356*67e74705SXin Li /// \param __a
357*67e74705SXin Li ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
358*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
359*67e74705SXin Li ///    the destination.
360*67e74705SXin Li /// \param __b
361*67e74705SXin Li ///    A 128-bit vector of [4 x i32] containing one of the source operands. The
362*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
363*67e74705SXin Li ///    the destination.
364*67e74705SXin Li /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365*67e74705SXin Li ///    of both operands.
366*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi32(__m128i __a,__m128i __b)367*67e74705SXin Li _mm_hsub_epi32(__m128i __a, __m128i __b)
368*67e74705SXin Li {
369*67e74705SXin Li     return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
370*67e74705SXin Li }
371*67e74705SXin Li 
372*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
373*67e74705SXin Li ///    packed 64-bit vectors of [4 x i16].
374*67e74705SXin Li ///
375*67e74705SXin Li /// \headerfile <x86intrin.h>
376*67e74705SXin Li ///
377*67e74705SXin Li /// This intrinsic corresponds to the \c PHSUBW instruction.
378*67e74705SXin Li ///
379*67e74705SXin Li /// \param __a
380*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
381*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
382*67e74705SXin Li ///    the destination.
383*67e74705SXin Li /// \param __b
384*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
385*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
386*67e74705SXin Li ///    the destination.
387*67e74705SXin Li /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388*67e74705SXin Li ///    of both operands.
389*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi16(__m64 __a,__m64 __b)390*67e74705SXin Li _mm_hsub_pi16(__m64 __a, __m64 __b)
391*67e74705SXin Li {
392*67e74705SXin Li     return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
393*67e74705SXin Li }
394*67e74705SXin Li 
395*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
396*67e74705SXin Li ///    packed 64-bit vectors of [2 x i32].
397*67e74705SXin Li ///
398*67e74705SXin Li /// \headerfile <x86intrin.h>
399*67e74705SXin Li ///
400*67e74705SXin Li /// This intrinsic corresponds to the \c PHSUBD instruction.
401*67e74705SXin Li ///
402*67e74705SXin Li /// \param __a
403*67e74705SXin Li ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
404*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
405*67e74705SXin Li ///    the destination.
406*67e74705SXin Li /// \param __b
407*67e74705SXin Li ///    A 64-bit vector of [2 x i32] containing one of the source operands. The
408*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
409*67e74705SXin Li ///    the destination.
410*67e74705SXin Li /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411*67e74705SXin Li ///    of both operands.
412*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsub_pi32(__m64 __a,__m64 __b)413*67e74705SXin Li _mm_hsub_pi32(__m64 __a, __m64 __b)
414*67e74705SXin Li {
415*67e74705SXin Li     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
416*67e74705SXin Li }
417*67e74705SXin Li 
418*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
419*67e74705SXin Li ///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
420*67e74705SXin Li ///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
421*67e74705SXin Li ///    saturated to 8000h.
422*67e74705SXin Li ///
423*67e74705SXin Li /// \headerfile <x86intrin.h>
424*67e74705SXin Li ///
425*67e74705SXin Li /// This intrinsic corresponds to the \c VPHSUBSW instruction.
426*67e74705SXin Li ///
427*67e74705SXin Li /// \param __a
428*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
429*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
430*67e74705SXin Li ///    the destination.
431*67e74705SXin Li /// \param __b
432*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands. The
433*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
434*67e74705SXin Li ///    the destination.
435*67e74705SXin Li /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
436*67e74705SXin Li ///    differences of both operands.
437*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubs_epi16(__m128i __a,__m128i __b)438*67e74705SXin Li _mm_hsubs_epi16(__m128i __a, __m128i __b)
439*67e74705SXin Li {
440*67e74705SXin Li     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
441*67e74705SXin Li }
442*67e74705SXin Li 
443*67e74705SXin Li /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
444*67e74705SXin Li ///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
445*67e74705SXin Li ///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
446*67e74705SXin Li ///    saturated to 8000h.
447*67e74705SXin Li ///
448*67e74705SXin Li /// \headerfile <x86intrin.h>
449*67e74705SXin Li ///
450*67e74705SXin Li /// This intrinsic corresponds to the \c PHSUBSW instruction.
451*67e74705SXin Li ///
452*67e74705SXin Li /// \param __a
453*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
454*67e74705SXin Li ///    horizontal differences between the values are stored in the lower bits of
455*67e74705SXin Li ///    the destination.
456*67e74705SXin Li /// \param __b
457*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands. The
458*67e74705SXin Li ///    horizontal differences between the values are stored in the upper bits of
459*67e74705SXin Li ///    the destination.
460*67e74705SXin Li /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
461*67e74705SXin Li ///    differences of both operands.
462*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_hsubs_pi16(__m64 __a,__m64 __b)463*67e74705SXin Li _mm_hsubs_pi16(__m64 __a, __m64 __b)
464*67e74705SXin Li {
465*67e74705SXin Li     return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
466*67e74705SXin Li }
467*67e74705SXin Li 
468*67e74705SXin Li /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
469*67e74705SXin Li ///    values contained in the first source operand and packed 8-bit signed
470*67e74705SXin Li ///    integer values contained in the second source operand, adds pairs of
471*67e74705SXin Li ///    contiguous products with signed saturation, and writes the 16-bit sums to
472*67e74705SXin Li ///    the corresponding bits in the destination. For example, bits [7:0] of
473*67e74705SXin Li ///    both operands are multiplied, bits [15:8] of both operands are
474*67e74705SXin Li ///    multiplied, and the sum of both results is written to bits [15:0] of the
475*67e74705SXin Li ///    destination.
476*67e74705SXin Li ///
477*67e74705SXin Li /// \headerfile <x86intrin.h>
478*67e74705SXin Li ///
479*67e74705SXin Li /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
480*67e74705SXin Li ///
481*67e74705SXin Li /// \param __a
482*67e74705SXin Li ///    A 128-bit integer vector containing the first source operand.
483*67e74705SXin Li /// \param __b
484*67e74705SXin Li ///    A 128-bit integer vector containing the second source operand.
485*67e74705SXin Li /// \returns A 128-bit integer vector containing the sums of products of both
486*67e74705SXin Li ///    operands:
487*67e74705SXin Li ///    R0 := (__a0 * __b0) + (__a1 * __b1)
488*67e74705SXin Li ///    R1 := (__a2 * __b2) + (__a3 * __b3)
489*67e74705SXin Li ///    R2 := (__a4 * __b4) + (__a5 * __b5)
490*67e74705SXin Li ///    R3 := (__a6 * __b6) + (__a7 * __b7)
491*67e74705SXin Li ///    R4 := (__a8 * __b8) + (__a9 * __b9)
492*67e74705SXin Li ///    R5 := (__a10 * __b10) + (__a11 * __b11)
493*67e74705SXin Li ///    R6 := (__a12 * __b12) + (__a13 * __b13)
494*67e74705SXin Li ///    R7 := (__a14 * __b14) + (__a15 * __b15)
495*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddubs_epi16(__m128i __a,__m128i __b)496*67e74705SXin Li _mm_maddubs_epi16(__m128i __a, __m128i __b)
497*67e74705SXin Li {
498*67e74705SXin Li     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
499*67e74705SXin Li }
500*67e74705SXin Li 
501*67e74705SXin Li /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
502*67e74705SXin Li ///    values contained in the first source operand and packed 8-bit signed
503*67e74705SXin Li ///    integer values contained in the second source operand, adds pairs of
504*67e74705SXin Li ///    contiguous products with signed saturation, and writes the 16-bit sums to
505*67e74705SXin Li ///    the corresponding bits in the destination. For example, bits [7:0] of
506*67e74705SXin Li ///    both operands are multiplied, bits [15:8] of both operands are
507*67e74705SXin Li ///    multiplied, and the sum of both results is written to bits [15:0] of the
508*67e74705SXin Li ///    destination.
509*67e74705SXin Li ///
510*67e74705SXin Li /// \headerfile <x86intrin.h>
511*67e74705SXin Li ///
512*67e74705SXin Li /// This intrinsic corresponds to the \c PMADDUBSW instruction.
513*67e74705SXin Li ///
514*67e74705SXin Li /// \param __a
515*67e74705SXin Li ///    A 64-bit integer vector containing the first source operand.
516*67e74705SXin Li /// \param __b
517*67e74705SXin Li ///    A 64-bit integer vector containing the second source operand.
518*67e74705SXin Li /// \returns A 64-bit integer vector containing the sums of products of both
519*67e74705SXin Li ///    operands:
520*67e74705SXin Li ///    R0 := (__a0 * __b0) + (__a1 * __b1)
521*67e74705SXin Li ///    R1 := (__a2 * __b2) + (__a3 * __b3)
522*67e74705SXin Li ///    R2 := (__a4 * __b4) + (__a5 * __b5)
523*67e74705SXin Li ///    R3 := (__a6 * __b6) + (__a7 * __b7)
524*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_maddubs_pi16(__m64 __a,__m64 __b)525*67e74705SXin Li _mm_maddubs_pi16(__m64 __a, __m64 __b)
526*67e74705SXin Li {
527*67e74705SXin Li     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
528*67e74705SXin Li }
529*67e74705SXin Li 
530*67e74705SXin Li /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
531*67e74705SXin Li ///    products to the 18 most significant bits by right-shifting, rounds the
532*67e74705SXin Li ///    truncated value by adding 1, and writes bits [16:1] to the destination.
533*67e74705SXin Li ///
534*67e74705SXin Li /// \headerfile <x86intrin.h>
535*67e74705SXin Li ///
536*67e74705SXin Li /// This intrinsic corresponds to the \c VPMULHRSW instruction.
537*67e74705SXin Li ///
538*67e74705SXin Li /// \param __a
539*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands.
540*67e74705SXin Li /// \param __b
541*67e74705SXin Li ///    A 128-bit vector of [8 x i16] containing one of the source operands.
542*67e74705SXin Li /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543*67e74705SXin Li ///    products of both operands.
544*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mulhrs_epi16(__m128i __a,__m128i __b)545*67e74705SXin Li _mm_mulhrs_epi16(__m128i __a, __m128i __b)
546*67e74705SXin Li {
547*67e74705SXin Li     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
548*67e74705SXin Li }
549*67e74705SXin Li 
550*67e74705SXin Li /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
551*67e74705SXin Li ///    products to the 18 most significant bits by right-shifting, rounds the
552*67e74705SXin Li ///    truncated value by adding 1, and writes bits [16:1] to the destination.
553*67e74705SXin Li ///
554*67e74705SXin Li /// \headerfile <x86intrin.h>
555*67e74705SXin Li ///
556*67e74705SXin Li /// This intrinsic corresponds to the \c PMULHRSW instruction.
557*67e74705SXin Li ///
558*67e74705SXin Li /// \param __a
559*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands.
560*67e74705SXin Li /// \param __b
561*67e74705SXin Li ///    A 64-bit vector of [4 x i16] containing one of the source operands.
562*67e74705SXin Li /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563*67e74705SXin Li ///    products of both operands.
564*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_mulhrs_pi16(__m64 __a,__m64 __b)565*67e74705SXin Li _mm_mulhrs_pi16(__m64 __a, __m64 __b)
566*67e74705SXin Li {
567*67e74705SXin Li     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
568*67e74705SXin Li }
569*67e74705SXin Li 
570*67e74705SXin Li /// \brief Copies the 8-bit integers from a 128-bit integer vector to the
571*67e74705SXin Li ///    destination or clears 8-bit values in the destination, as specified by
572*67e74705SXin Li ///    the second source operand.
573*67e74705SXin Li ///
574*67e74705SXin Li /// \headerfile <x86intrin.h>
575*67e74705SXin Li ///
576*67e74705SXin Li /// This intrinsic corresponds to the \c VPSHUFB instruction.
577*67e74705SXin Li ///
578*67e74705SXin Li /// \param __a
579*67e74705SXin Li ///    A 128-bit integer vector containing the values to be copied.
580*67e74705SXin Li /// \param __b
581*67e74705SXin Li ///    A 128-bit integer vector containing control bytes corresponding to
582*67e74705SXin Li ///    positions in the destination:
583*67e74705SXin Li ///    Bit 7:
584*67e74705SXin Li ///    1: Clear the corresponding byte in the destination.
585*67e74705SXin Li ///    0: Copy the selected source byte to the corresponding byte in the
586*67e74705SXin Li ///    destination.
587*67e74705SXin Li ///    Bits [6:4] Reserved.
588*67e74705SXin Li ///    Bits [3:0] select the source byte to be copied.
589*67e74705SXin Li /// \returns A 128-bit integer vector containing the copied or cleared values.
590*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shuffle_epi8(__m128i __a,__m128i __b)591*67e74705SXin Li _mm_shuffle_epi8(__m128i __a, __m128i __b)
592*67e74705SXin Li {
593*67e74705SXin Li     return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
594*67e74705SXin Li }
595*67e74705SXin Li 
596*67e74705SXin Li /// \brief Copies the 8-bit integers from a 64-bit integer vector to the
597*67e74705SXin Li ///    destination or clears 8-bit values in the destination, as specified by
598*67e74705SXin Li ///    the second source operand.
599*67e74705SXin Li ///
600*67e74705SXin Li /// \headerfile <x86intrin.h>
601*67e74705SXin Li ///
602*67e74705SXin Li /// This intrinsic corresponds to the \c PSHUFB instruction.
603*67e74705SXin Li ///
604*67e74705SXin Li /// \param __a
605*67e74705SXin Li ///    A 64-bit integer vector containing the values to be copied.
606*67e74705SXin Li /// \param __b
607*67e74705SXin Li ///    A 64-bit integer vector containing control bytes corresponding to
608*67e74705SXin Li ///    positions in the destination:
609*67e74705SXin Li ///    Bit 7:
610*67e74705SXin Li ///    1: Clear the corresponding byte in the destination.
611*67e74705SXin Li ///    0: Copy the selected source byte to the corresponding byte in the
612*67e74705SXin Li ///    destination.
613*67e74705SXin Li ///    Bits [3:0] select the source byte to be copied.
614*67e74705SXin Li /// \returns A 64-bit integer vector containing the copied or cleared values.
615*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_shuffle_pi8(__m64 __a,__m64 __b)616*67e74705SXin Li _mm_shuffle_pi8(__m64 __a, __m64 __b)
617*67e74705SXin Li {
618*67e74705SXin Li     return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
619*67e74705SXin Li }
620*67e74705SXin Li 
621*67e74705SXin Li /// \brief For each 8-bit integer in the first source operand, perform one of
622*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
623*67e74705SXin Li ///    byte in the second source is negative, calculate the two's complement of
624*67e74705SXin Li ///    the corresponding byte in the first source, and write that value to the
625*67e74705SXin Li ///    destination. If the byte in the second source is positive, copy the
626*67e74705SXin Li ///    corresponding byte from the first source to the destination. If the byte
627*67e74705SXin Li ///    in the second source is zero, clear the corresponding byte in the
628*67e74705SXin Li ///    destination.
629*67e74705SXin Li ///
630*67e74705SXin Li /// \headerfile <x86intrin.h>
631*67e74705SXin Li ///
632*67e74705SXin Li /// This intrinsic corresponds to the \c VPSIGNB instruction.
633*67e74705SXin Li ///
634*67e74705SXin Li /// \param __a
635*67e74705SXin Li ///    A 128-bit integer vector containing the values to be copied.
636*67e74705SXin Li /// \param __b
637*67e74705SXin Li ///    A 128-bit integer vector containing control bytes corresponding to
638*67e74705SXin Li ///    positions in the destination.
639*67e74705SXin Li /// \returns A 128-bit integer vector containing the resultant values.
640*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi8(__m128i __a,__m128i __b)641*67e74705SXin Li _mm_sign_epi8(__m128i __a, __m128i __b)
642*67e74705SXin Li {
643*67e74705SXin Li     return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
644*67e74705SXin Li }
645*67e74705SXin Li 
646*67e74705SXin Li /// \brief For each 16-bit integer in the first source operand, perform one of
647*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
648*67e74705SXin Li ///    word in the second source is negative, calculate the two's complement of
649*67e74705SXin Li ///    the corresponding word in the first source, and write that value to the
650*67e74705SXin Li ///    destination. If the word in the second source is positive, copy the
651*67e74705SXin Li ///    corresponding word from the first source to the destination. If the word
652*67e74705SXin Li ///    in the second source is zero, clear the corresponding word in the
653*67e74705SXin Li ///    destination.
654*67e74705SXin Li ///
655*67e74705SXin Li /// \headerfile <x86intrin.h>
656*67e74705SXin Li ///
657*67e74705SXin Li /// This intrinsic corresponds to the \c VPSIGNW instruction.
658*67e74705SXin Li ///
659*67e74705SXin Li /// \param __a
660*67e74705SXin Li ///    A 128-bit integer vector containing the values to be copied.
661*67e74705SXin Li /// \param __b
662*67e74705SXin Li ///    A 128-bit integer vector containing control words corresponding to
663*67e74705SXin Li ///    positions in the destination.
664*67e74705SXin Li /// \returns A 128-bit integer vector containing the resultant values.
665*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi16(__m128i __a,__m128i __b)666*67e74705SXin Li _mm_sign_epi16(__m128i __a, __m128i __b)
667*67e74705SXin Li {
668*67e74705SXin Li     return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
669*67e74705SXin Li }
670*67e74705SXin Li 
671*67e74705SXin Li /// \brief For each 32-bit integer in the first source operand, perform one of
672*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
673*67e74705SXin Li ///    doubleword in the second source is negative, calculate the two's
674*67e74705SXin Li ///    complement of the corresponding word in the first source, and write that
675*67e74705SXin Li ///    value to the destination. If the doubleword in the second source is
676*67e74705SXin Li ///    positive, copy the corresponding word from the first source to the
677*67e74705SXin Li ///    destination. If the doubleword in the second source is zero, clear the
678*67e74705SXin Li ///    corresponding word in the destination.
679*67e74705SXin Li ///
680*67e74705SXin Li /// \headerfile <x86intrin.h>
681*67e74705SXin Li ///
682*67e74705SXin Li /// This intrinsic corresponds to the \c VPSIGND instruction.
683*67e74705SXin Li ///
684*67e74705SXin Li /// \param __a
685*67e74705SXin Li ///    A 128-bit integer vector containing the values to be copied.
686*67e74705SXin Li /// \param __b
687*67e74705SXin Li ///    A 128-bit integer vector containing control doublewords corresponding to
688*67e74705SXin Li ///    positions in the destination.
689*67e74705SXin Li /// \returns A 128-bit integer vector containing the resultant values.
690*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi32(__m128i __a,__m128i __b)691*67e74705SXin Li _mm_sign_epi32(__m128i __a, __m128i __b)
692*67e74705SXin Li {
693*67e74705SXin Li     return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694*67e74705SXin Li }
695*67e74705SXin Li 
696*67e74705SXin Li /// \brief For each 8-bit integer in the first source operand, perform one of
697*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
698*67e74705SXin Li ///    byte in the second source is negative, calculate the two's complement of
699*67e74705SXin Li ///    the corresponding byte in the first source, and write that value to the
700*67e74705SXin Li ///    destination. If the byte in the second source is positive, copy the
701*67e74705SXin Li ///    corresponding byte from the first source to the destination. If the byte
702*67e74705SXin Li ///    in the second source is zero, clear the corresponding byte in the
703*67e74705SXin Li ///    destination.
704*67e74705SXin Li ///
705*67e74705SXin Li /// \headerfile <x86intrin.h>
706*67e74705SXin Li ///
707*67e74705SXin Li /// This intrinsic corresponds to the \c PSIGNB instruction.
708*67e74705SXin Li ///
709*67e74705SXin Li /// \param __a
710*67e74705SXin Li ///    A 64-bit integer vector containing the values to be copied.
711*67e74705SXin Li /// \param __b
712*67e74705SXin Li ///    A 64-bit integer vector containing control bytes corresponding to
713*67e74705SXin Li ///    positions in the destination.
714*67e74705SXin Li /// \returns A 64-bit integer vector containing the resultant values.
715*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi8(__m64 __a,__m64 __b)716*67e74705SXin Li _mm_sign_pi8(__m64 __a, __m64 __b)
717*67e74705SXin Li {
718*67e74705SXin Li     return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
719*67e74705SXin Li }
720*67e74705SXin Li 
721*67e74705SXin Li /// \brief For each 16-bit integer in the first source operand, perform one of
722*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
723*67e74705SXin Li ///    word in the second source is negative, calculate the two's complement of
724*67e74705SXin Li ///    the corresponding word in the first source, and write that value to the
725*67e74705SXin Li ///    destination. If the word in the second source is positive, copy the
726*67e74705SXin Li ///    corresponding word from the first source to the destination. If the word
727*67e74705SXin Li ///    in the second source is zero, clear the corresponding word in the
728*67e74705SXin Li ///    destination.
729*67e74705SXin Li ///
730*67e74705SXin Li /// \headerfile <x86intrin.h>
731*67e74705SXin Li ///
732*67e74705SXin Li /// This intrinsic corresponds to the \c PSIGNW instruction.
733*67e74705SXin Li ///
734*67e74705SXin Li /// \param __a
735*67e74705SXin Li ///    A 64-bit integer vector containing the values to be copied.
736*67e74705SXin Li /// \param __b
737*67e74705SXin Li ///    A 64-bit integer vector containing control words corresponding to
738*67e74705SXin Li ///    positions in the destination.
739*67e74705SXin Li /// \returns A 64-bit integer vector containing the resultant values.
740*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi16(__m64 __a,__m64 __b)741*67e74705SXin Li _mm_sign_pi16(__m64 __a, __m64 __b)
742*67e74705SXin Li {
743*67e74705SXin Li     return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
744*67e74705SXin Li }
745*67e74705SXin Li 
746*67e74705SXin Li /// \brief For each 32-bit integer in the first source operand, perform one of
747*67e74705SXin Li ///    the following actions as specified by the second source operand: If the
748*67e74705SXin Li ///    doubleword in the second source is negative, calculate the two's
749*67e74705SXin Li ///    complement of the corresponding doubleword in the first source, and
750*67e74705SXin Li ///    write that value to the destination. If the doubleword in the second
751*67e74705SXin Li ///    source is positive, copy the corresponding doubleword from the first
752*67e74705SXin Li ///    source to the destination. If the doubleword in the second source is
753*67e74705SXin Li ///    zero, clear the corresponding doubleword in the destination.
754*67e74705SXin Li ///
755*67e74705SXin Li /// \headerfile <x86intrin.h>
756*67e74705SXin Li ///
757*67e74705SXin Li /// This intrinsic corresponds to the \c PSIGND instruction.
758*67e74705SXin Li ///
759*67e74705SXin Li /// \param __a
760*67e74705SXin Li ///    A 64-bit integer vector containing the values to be copied.
761*67e74705SXin Li /// \param __b
762*67e74705SXin Li ///    A 64-bit integer vector containing two control doublewords corresponding
763*67e74705SXin Li ///    to positions in the destination.
764*67e74705SXin Li /// \returns A 64-bit integer vector containing the resultant values.
765*67e74705SXin Li static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_sign_pi32(__m64 __a,__m64 __b)766*67e74705SXin Li _mm_sign_pi32(__m64 __a, __m64 __b)
767*67e74705SXin Li {
768*67e74705SXin Li     return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
769*67e74705SXin Li }
770*67e74705SXin Li 
771*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
772*67e74705SXin Li 
773*67e74705SXin Li #endif /* __TMMINTRIN_H */
774