1*bed243d3SAndroid Build Coastguard Worker /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===
2*bed243d3SAndroid Build Coastguard Worker *
3*bed243d3SAndroid Build Coastguard Worker * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bed243d3SAndroid Build Coastguard Worker * See https://llvm.org/LICENSE.txt for license information.
5*bed243d3SAndroid Build Coastguard Worker * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bed243d3SAndroid Build Coastguard Worker *
7*bed243d3SAndroid Build Coastguard Worker *===-----------------------------------------------------------------------===
8*bed243d3SAndroid Build Coastguard Worker */
9*bed243d3SAndroid Build Coastguard Worker
10*bed243d3SAndroid Build Coastguard Worker #ifndef __SMMINTRIN_H
11*bed243d3SAndroid Build Coastguard Worker #define __SMMINTRIN_H
12*bed243d3SAndroid Build Coastguard Worker
13*bed243d3SAndroid Build Coastguard Worker #if !defined(__i386__) && !defined(__x86_64__)
14*bed243d3SAndroid Build Coastguard Worker #error "This header is only meant to be used on x86 and x64 architecture"
15*bed243d3SAndroid Build Coastguard Worker #endif
16*bed243d3SAndroid Build Coastguard Worker
17*bed243d3SAndroid Build Coastguard Worker #include <tmmintrin.h>
18*bed243d3SAndroid Build Coastguard Worker
19*bed243d3SAndroid Build Coastguard Worker /* Define the default attributes for the functions in this file. */
20*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS \
21*bed243d3SAndroid Build Coastguard Worker __attribute__((__always_inline__, __nodebug__, \
22*bed243d3SAndroid Build Coastguard Worker __target__("sse4.1,no-evex512"), __min_vector_width__(128)))
23*bed243d3SAndroid Build Coastguard Worker
24*bed243d3SAndroid Build Coastguard Worker /* SSE4 Rounding macros. */
25*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_TO_NEAREST_INT 0x00
26*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_TO_NEG_INF 0x01
27*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_TO_POS_INF 0x02
28*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_TO_ZERO 0x03
29*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_CUR_DIRECTION 0x04
30*bed243d3SAndroid Build Coastguard Worker
31*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_RAISE_EXC 0x00
32*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_NO_EXC 0x08
33*bed243d3SAndroid Build Coastguard Worker
34*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)
35*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)
36*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)
37*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)
38*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
39*bed243d3SAndroid Build Coastguard Worker #define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
40*bed243d3SAndroid Build Coastguard Worker
41*bed243d3SAndroid Build Coastguard Worker /// Rounds up each element of the 128-bit vector of [4 x float] to an
42*bed243d3SAndroid Build Coastguard Worker /// integer and returns the rounded values in a 128-bit vector of
43*bed243d3SAndroid Build Coastguard Worker /// [4 x float].
44*bed243d3SAndroid Build Coastguard Worker ///
45*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
46*bed243d3SAndroid Build Coastguard Worker ///
47*bed243d3SAndroid Build Coastguard Worker /// \code
48*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_ceil_ps(__m128 X);
49*bed243d3SAndroid Build Coastguard Worker /// \endcode
50*bed243d3SAndroid Build Coastguard Worker ///
51*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
52*bed243d3SAndroid Build Coastguard Worker ///
53*bed243d3SAndroid Build Coastguard Worker /// \param X
54*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float] values to be rounded up.
55*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the rounded values.
56*bed243d3SAndroid Build Coastguard Worker #define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)
57*bed243d3SAndroid Build Coastguard Worker
58*bed243d3SAndroid Build Coastguard Worker /// Rounds up each element of the 128-bit vector of [2 x double] to an
59*bed243d3SAndroid Build Coastguard Worker /// integer and returns the rounded values in a 128-bit vector of
60*bed243d3SAndroid Build Coastguard Worker /// [2 x double].
61*bed243d3SAndroid Build Coastguard Worker ///
62*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
63*bed243d3SAndroid Build Coastguard Worker ///
64*bed243d3SAndroid Build Coastguard Worker /// \code
65*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_ceil_pd(__m128d X);
66*bed243d3SAndroid Build Coastguard Worker /// \endcode
67*bed243d3SAndroid Build Coastguard Worker ///
68*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
69*bed243d3SAndroid Build Coastguard Worker ///
70*bed243d3SAndroid Build Coastguard Worker /// \param X
71*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double] values to be rounded up.
72*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the rounded values.
73*bed243d3SAndroid Build Coastguard Worker #define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)
74*bed243d3SAndroid Build Coastguard Worker
75*bed243d3SAndroid Build Coastguard Worker /// Copies three upper elements of the first 128-bit vector operand to
76*bed243d3SAndroid Build Coastguard Worker /// the corresponding three upper elements of the 128-bit result vector of
77*bed243d3SAndroid Build Coastguard Worker /// [4 x float]. Rounds up the lowest element of the second 128-bit vector
78*bed243d3SAndroid Build Coastguard Worker /// operand to an integer and copies it to the lowest element of the 128-bit
79*bed243d3SAndroid Build Coastguard Worker /// result vector of [4 x float].
80*bed243d3SAndroid Build Coastguard Worker ///
81*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
82*bed243d3SAndroid Build Coastguard Worker ///
83*bed243d3SAndroid Build Coastguard Worker /// \code
84*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_ceil_ss(__m128 X, __m128 Y);
85*bed243d3SAndroid Build Coastguard Worker /// \endcode
86*bed243d3SAndroid Build Coastguard Worker ///
87*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
88*bed243d3SAndroid Build Coastguard Worker ///
89*bed243d3SAndroid Build Coastguard Worker /// \param X
90*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
91*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
92*bed243d3SAndroid Build Coastguard Worker /// \param Y
93*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
94*bed243d3SAndroid Build Coastguard Worker /// rounded up to the nearest integer and copied to the corresponding bits
95*bed243d3SAndroid Build Coastguard Worker /// of the result.
96*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
97*bed243d3SAndroid Build Coastguard Worker /// values.
98*bed243d3SAndroid Build Coastguard Worker #define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
99*bed243d3SAndroid Build Coastguard Worker
100*bed243d3SAndroid Build Coastguard Worker /// Copies the upper element of the first 128-bit vector operand to the
101*bed243d3SAndroid Build Coastguard Worker /// corresponding upper element of the 128-bit result vector of [2 x double].
102*bed243d3SAndroid Build Coastguard Worker /// Rounds up the lower element of the second 128-bit vector operand to an
103*bed243d3SAndroid Build Coastguard Worker /// integer and copies it to the lower element of the 128-bit result vector
104*bed243d3SAndroid Build Coastguard Worker /// of [2 x double].
105*bed243d3SAndroid Build Coastguard Worker ///
106*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
107*bed243d3SAndroid Build Coastguard Worker ///
108*bed243d3SAndroid Build Coastguard Worker /// \code
109*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_ceil_sd(__m128d X, __m128d Y);
110*bed243d3SAndroid Build Coastguard Worker /// \endcode
111*bed243d3SAndroid Build Coastguard Worker ///
112*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
113*bed243d3SAndroid Build Coastguard Worker ///
114*bed243d3SAndroid Build Coastguard Worker /// \param X
115*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
116*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
117*bed243d3SAndroid Build Coastguard Worker /// \param Y
118*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
119*bed243d3SAndroid Build Coastguard Worker /// rounded up to the nearest integer and copied to the corresponding bits
120*bed243d3SAndroid Build Coastguard Worker /// of the result.
121*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
122*bed243d3SAndroid Build Coastguard Worker /// values.
123*bed243d3SAndroid Build Coastguard Worker #define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
124*bed243d3SAndroid Build Coastguard Worker
125*bed243d3SAndroid Build Coastguard Worker /// Rounds down each element of the 128-bit vector of [4 x float] to an
126*bed243d3SAndroid Build Coastguard Worker /// an integer and returns the rounded values in a 128-bit vector of
127*bed243d3SAndroid Build Coastguard Worker /// [4 x float].
128*bed243d3SAndroid Build Coastguard Worker ///
129*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
130*bed243d3SAndroid Build Coastguard Worker ///
131*bed243d3SAndroid Build Coastguard Worker /// \code
132*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_floor_ps(__m128 X);
133*bed243d3SAndroid Build Coastguard Worker /// \endcode
134*bed243d3SAndroid Build Coastguard Worker ///
135*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
136*bed243d3SAndroid Build Coastguard Worker ///
137*bed243d3SAndroid Build Coastguard Worker /// \param X
138*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float] values to be rounded down.
139*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the rounded values.
140*bed243d3SAndroid Build Coastguard Worker #define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)
141*bed243d3SAndroid Build Coastguard Worker
142*bed243d3SAndroid Build Coastguard Worker /// Rounds down each element of the 128-bit vector of [2 x double] to an
143*bed243d3SAndroid Build Coastguard Worker /// integer and returns the rounded values in a 128-bit vector of
144*bed243d3SAndroid Build Coastguard Worker /// [2 x double].
145*bed243d3SAndroid Build Coastguard Worker ///
146*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
147*bed243d3SAndroid Build Coastguard Worker ///
148*bed243d3SAndroid Build Coastguard Worker /// \code
149*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_floor_pd(__m128d X);
150*bed243d3SAndroid Build Coastguard Worker /// \endcode
151*bed243d3SAndroid Build Coastguard Worker ///
152*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
153*bed243d3SAndroid Build Coastguard Worker ///
154*bed243d3SAndroid Build Coastguard Worker /// \param X
155*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
156*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the rounded values.
157*bed243d3SAndroid Build Coastguard Worker #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)
158*bed243d3SAndroid Build Coastguard Worker
159*bed243d3SAndroid Build Coastguard Worker /// Copies three upper elements of the first 128-bit vector operand to
160*bed243d3SAndroid Build Coastguard Worker /// the corresponding three upper elements of the 128-bit result vector of
161*bed243d3SAndroid Build Coastguard Worker /// [4 x float]. Rounds down the lowest element of the second 128-bit vector
162*bed243d3SAndroid Build Coastguard Worker /// operand to an integer and copies it to the lowest element of the 128-bit
163*bed243d3SAndroid Build Coastguard Worker /// result vector of [4 x float].
164*bed243d3SAndroid Build Coastguard Worker ///
165*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
166*bed243d3SAndroid Build Coastguard Worker ///
167*bed243d3SAndroid Build Coastguard Worker /// \code
168*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_floor_ss(__m128 X, __m128 Y);
169*bed243d3SAndroid Build Coastguard Worker /// \endcode
170*bed243d3SAndroid Build Coastguard Worker ///
171*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
172*bed243d3SAndroid Build Coastguard Worker ///
173*bed243d3SAndroid Build Coastguard Worker /// \param X
174*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
175*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
176*bed243d3SAndroid Build Coastguard Worker /// \param Y
177*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
178*bed243d3SAndroid Build Coastguard Worker /// rounded down to the nearest integer and copied to the corresponding bits
179*bed243d3SAndroid Build Coastguard Worker /// of the result.
180*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
181*bed243d3SAndroid Build Coastguard Worker /// values.
182*bed243d3SAndroid Build Coastguard Worker #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
183*bed243d3SAndroid Build Coastguard Worker
184*bed243d3SAndroid Build Coastguard Worker /// Copies the upper element of the first 128-bit vector operand to the
185*bed243d3SAndroid Build Coastguard Worker /// corresponding upper element of the 128-bit result vector of [2 x double].
186*bed243d3SAndroid Build Coastguard Worker /// Rounds down the lower element of the second 128-bit vector operand to an
187*bed243d3SAndroid Build Coastguard Worker /// integer and copies it to the lower element of the 128-bit result vector
188*bed243d3SAndroid Build Coastguard Worker /// of [2 x double].
189*bed243d3SAndroid Build Coastguard Worker ///
190*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
191*bed243d3SAndroid Build Coastguard Worker ///
192*bed243d3SAndroid Build Coastguard Worker /// \code
193*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_floor_sd(__m128d X, __m128d Y);
194*bed243d3SAndroid Build Coastguard Worker /// \endcode
195*bed243d3SAndroid Build Coastguard Worker ///
196*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
197*bed243d3SAndroid Build Coastguard Worker ///
198*bed243d3SAndroid Build Coastguard Worker /// \param X
199*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
200*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
201*bed243d3SAndroid Build Coastguard Worker /// \param Y
202*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
203*bed243d3SAndroid Build Coastguard Worker /// rounded down to the nearest integer and copied to the corresponding bits
204*bed243d3SAndroid Build Coastguard Worker /// of the result.
205*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
206*bed243d3SAndroid Build Coastguard Worker /// values.
207*bed243d3SAndroid Build Coastguard Worker #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
208*bed243d3SAndroid Build Coastguard Worker
209*bed243d3SAndroid Build Coastguard Worker /// Rounds each element of the 128-bit vector of [4 x float] to an
210*bed243d3SAndroid Build Coastguard Worker /// integer value according to the rounding control specified by the second
211*bed243d3SAndroid Build Coastguard Worker /// argument and returns the rounded values in a 128-bit vector of
212*bed243d3SAndroid Build Coastguard Worker /// [4 x float].
213*bed243d3SAndroid Build Coastguard Worker ///
214*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
215*bed243d3SAndroid Build Coastguard Worker ///
216*bed243d3SAndroid Build Coastguard Worker /// \code
217*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_round_ps(__m128 X, const int M);
218*bed243d3SAndroid Build Coastguard Worker /// \endcode
219*bed243d3SAndroid Build Coastguard Worker ///
220*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.
221*bed243d3SAndroid Build Coastguard Worker ///
222*bed243d3SAndroid Build Coastguard Worker /// \param X
223*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
224*bed243d3SAndroid Build Coastguard Worker /// \param M
225*bed243d3SAndroid Build Coastguard Worker /// An integer value that specifies the rounding operation. \n
226*bed243d3SAndroid Build Coastguard Worker /// Bits [7:4] are reserved. \n
227*bed243d3SAndroid Build Coastguard Worker /// Bit [3] is a precision exception value: \n
228*bed243d3SAndroid Build Coastguard Worker /// 0: A normal PE exception is used \n
229*bed243d3SAndroid Build Coastguard Worker /// 1: The PE field is not updated \n
230*bed243d3SAndroid Build Coastguard Worker /// Bit [2] is the rounding control source: \n
231*bed243d3SAndroid Build Coastguard Worker /// 0: Use bits [1:0] of \a M \n
232*bed243d3SAndroid Build Coastguard Worker /// 1: Use the current MXCSR setting \n
233*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0] contain the rounding control definition: \n
234*bed243d3SAndroid Build Coastguard Worker /// 00: Nearest \n
235*bed243d3SAndroid Build Coastguard Worker /// 01: Downward (toward negative infinity) \n
236*bed243d3SAndroid Build Coastguard Worker /// 10: Upward (toward positive infinity) \n
237*bed243d3SAndroid Build Coastguard Worker /// 11: Truncated
238*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the rounded values.
239*bed243d3SAndroid Build Coastguard Worker #define _mm_round_ps(X, M) \
240*bed243d3SAndroid Build Coastguard Worker ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
241*bed243d3SAndroid Build Coastguard Worker
242*bed243d3SAndroid Build Coastguard Worker /// Copies three upper elements of the first 128-bit vector operand to
243*bed243d3SAndroid Build Coastguard Worker /// the corresponding three upper elements of the 128-bit result vector of
244*bed243d3SAndroid Build Coastguard Worker /// [4 x float]. Rounds the lowest element of the second 128-bit vector
245*bed243d3SAndroid Build Coastguard Worker /// operand to an integer value according to the rounding control specified
246*bed243d3SAndroid Build Coastguard Worker /// by the third argument and copies it to the lowest element of the 128-bit
247*bed243d3SAndroid Build Coastguard Worker /// result vector of [4 x float].
248*bed243d3SAndroid Build Coastguard Worker ///
249*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
250*bed243d3SAndroid Build Coastguard Worker ///
251*bed243d3SAndroid Build Coastguard Worker /// \code
252*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);
253*bed243d3SAndroid Build Coastguard Worker /// \endcode
254*bed243d3SAndroid Build Coastguard Worker ///
255*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.
256*bed243d3SAndroid Build Coastguard Worker ///
257*bed243d3SAndroid Build Coastguard Worker /// \param X
258*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are
259*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
260*bed243d3SAndroid Build Coastguard Worker /// \param Y
261*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is
262*bed243d3SAndroid Build Coastguard Worker /// rounded to the nearest integer using the specified rounding control and
263*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
264*bed243d3SAndroid Build Coastguard Worker /// \param M
265*bed243d3SAndroid Build Coastguard Worker /// An integer value that specifies the rounding operation. \n
266*bed243d3SAndroid Build Coastguard Worker /// Bits [7:4] are reserved. \n
267*bed243d3SAndroid Build Coastguard Worker /// Bit [3] is a precision exception value: \n
268*bed243d3SAndroid Build Coastguard Worker /// 0: A normal PE exception is used \n
269*bed243d3SAndroid Build Coastguard Worker /// 1: The PE field is not updated \n
270*bed243d3SAndroid Build Coastguard Worker /// Bit [2] is the rounding control source: \n
271*bed243d3SAndroid Build Coastguard Worker /// 0: Use bits [1:0] of \a M \n
272*bed243d3SAndroid Build Coastguard Worker /// 1: Use the current MXCSR setting \n
273*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0] contain the rounding control definition: \n
274*bed243d3SAndroid Build Coastguard Worker /// 00: Nearest \n
275*bed243d3SAndroid Build Coastguard Worker /// 01: Downward (toward negative infinity) \n
276*bed243d3SAndroid Build Coastguard Worker /// 10: Upward (toward positive infinity) \n
277*bed243d3SAndroid Build Coastguard Worker /// 11: Truncated
278*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied and rounded
279*bed243d3SAndroid Build Coastguard Worker /// values.
280*bed243d3SAndroid Build Coastguard Worker #define _mm_round_ss(X, Y, M) \
281*bed243d3SAndroid Build Coastguard Worker ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
282*bed243d3SAndroid Build Coastguard Worker (M)))
283*bed243d3SAndroid Build Coastguard Worker
284*bed243d3SAndroid Build Coastguard Worker /// Rounds each element of the 128-bit vector of [2 x double] to an
285*bed243d3SAndroid Build Coastguard Worker /// integer value according to the rounding control specified by the second
286*bed243d3SAndroid Build Coastguard Worker /// argument and returns the rounded values in a 128-bit vector of
287*bed243d3SAndroid Build Coastguard Worker /// [2 x double].
288*bed243d3SAndroid Build Coastguard Worker ///
289*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
290*bed243d3SAndroid Build Coastguard Worker ///
291*bed243d3SAndroid Build Coastguard Worker /// \code
292*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_round_pd(__m128d X, const int M);
293*bed243d3SAndroid Build Coastguard Worker /// \endcode
294*bed243d3SAndroid Build Coastguard Worker ///
295*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.
296*bed243d3SAndroid Build Coastguard Worker ///
297*bed243d3SAndroid Build Coastguard Worker /// \param X
298*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
299*bed243d3SAndroid Build Coastguard Worker /// \param M
300*bed243d3SAndroid Build Coastguard Worker /// An integer value that specifies the rounding operation. \n
301*bed243d3SAndroid Build Coastguard Worker /// Bits [7:4] are reserved. \n
302*bed243d3SAndroid Build Coastguard Worker /// Bit [3] is a precision exception value: \n
303*bed243d3SAndroid Build Coastguard Worker /// 0: A normal PE exception is used \n
304*bed243d3SAndroid Build Coastguard Worker /// 1: The PE field is not updated \n
305*bed243d3SAndroid Build Coastguard Worker /// Bit [2] is the rounding control source: \n
306*bed243d3SAndroid Build Coastguard Worker /// 0: Use bits [1:0] of \a M \n
307*bed243d3SAndroid Build Coastguard Worker /// 1: Use the current MXCSR setting \n
308*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0] contain the rounding control definition: \n
309*bed243d3SAndroid Build Coastguard Worker /// 00: Nearest \n
310*bed243d3SAndroid Build Coastguard Worker /// 01: Downward (toward negative infinity) \n
311*bed243d3SAndroid Build Coastguard Worker /// 10: Upward (toward positive infinity) \n
312*bed243d3SAndroid Build Coastguard Worker /// 11: Truncated
313*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the rounded values.
314*bed243d3SAndroid Build Coastguard Worker #define _mm_round_pd(X, M) \
315*bed243d3SAndroid Build Coastguard Worker ((__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)))
316*bed243d3SAndroid Build Coastguard Worker
317*bed243d3SAndroid Build Coastguard Worker /// Copies the upper element of the first 128-bit vector operand to the
318*bed243d3SAndroid Build Coastguard Worker /// corresponding upper element of the 128-bit result vector of [2 x double].
319*bed243d3SAndroid Build Coastguard Worker /// Rounds the lower element of the second 128-bit vector operand to an
320*bed243d3SAndroid Build Coastguard Worker /// integer value according to the rounding control specified by the third
321*bed243d3SAndroid Build Coastguard Worker /// argument and copies it to the lower element of the 128-bit result vector
322*bed243d3SAndroid Build Coastguard Worker /// of [2 x double].
323*bed243d3SAndroid Build Coastguard Worker ///
324*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
325*bed243d3SAndroid Build Coastguard Worker ///
326*bed243d3SAndroid Build Coastguard Worker /// \code
327*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);
328*bed243d3SAndroid Build Coastguard Worker /// \endcode
329*bed243d3SAndroid Build Coastguard Worker ///
330*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.
331*bed243d3SAndroid Build Coastguard Worker ///
332*bed243d3SAndroid Build Coastguard Worker /// \param X
333*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is
334*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
335*bed243d3SAndroid Build Coastguard Worker /// \param Y
336*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is
337*bed243d3SAndroid Build Coastguard Worker /// rounded to the nearest integer using the specified rounding control and
338*bed243d3SAndroid Build Coastguard Worker /// copied to the corresponding bits of the result.
339*bed243d3SAndroid Build Coastguard Worker /// \param M
340*bed243d3SAndroid Build Coastguard Worker /// An integer value that specifies the rounding operation. \n
341*bed243d3SAndroid Build Coastguard Worker /// Bits [7:4] are reserved. \n
342*bed243d3SAndroid Build Coastguard Worker /// Bit [3] is a precision exception value: \n
343*bed243d3SAndroid Build Coastguard Worker /// 0: A normal PE exception is used \n
344*bed243d3SAndroid Build Coastguard Worker /// 1: The PE field is not updated \n
345*bed243d3SAndroid Build Coastguard Worker /// Bit [2] is the rounding control source: \n
346*bed243d3SAndroid Build Coastguard Worker /// 0: Use bits [1:0] of \a M \n
347*bed243d3SAndroid Build Coastguard Worker /// 1: Use the current MXCSR setting \n
348*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0] contain the rounding control definition: \n
349*bed243d3SAndroid Build Coastguard Worker /// 00: Nearest \n
350*bed243d3SAndroid Build Coastguard Worker /// 01: Downward (toward negative infinity) \n
351*bed243d3SAndroid Build Coastguard Worker /// 10: Upward (toward positive infinity) \n
352*bed243d3SAndroid Build Coastguard Worker /// 11: Truncated
353*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied and rounded
354*bed243d3SAndroid Build Coastguard Worker /// values.
355*bed243d3SAndroid Build Coastguard Worker #define _mm_round_sd(X, Y, M) \
356*bed243d3SAndroid Build Coastguard Worker ((__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \
357*bed243d3SAndroid Build Coastguard Worker (M)))
358*bed243d3SAndroid Build Coastguard Worker
359*bed243d3SAndroid Build Coastguard Worker /* SSE4 Packed Blending Intrinsics. */
360*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [2 x double] where the values are
361*bed243d3SAndroid Build Coastguard Worker /// selected from either the first or second operand as specified by the
362*bed243d3SAndroid Build Coastguard Worker /// third operand, the control mask.
363*bed243d3SAndroid Build Coastguard Worker ///
364*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
365*bed243d3SAndroid Build Coastguard Worker ///
366*bed243d3SAndroid Build Coastguard Worker /// \code
367*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);
368*bed243d3SAndroid Build Coastguard Worker /// \endcode
369*bed243d3SAndroid Build Coastguard Worker ///
370*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.
371*bed243d3SAndroid Build Coastguard Worker ///
372*bed243d3SAndroid Build Coastguard Worker /// \param V1
373*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
374*bed243d3SAndroid Build Coastguard Worker /// \param V2
375*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
376*bed243d3SAndroid Build Coastguard Worker /// \param M
377*bed243d3SAndroid Build Coastguard Worker /// An immediate integer operand, with mask bits [1:0] specifying how the
378*bed243d3SAndroid Build Coastguard Worker /// values are to be copied. The position of the mask bit corresponds to the
379*bed243d3SAndroid Build Coastguard Worker /// index of a copied value. When a mask bit is 0, the corresponding 64-bit
380*bed243d3SAndroid Build Coastguard Worker /// element in operand \a V1 is copied to the same position in the result.
381*bed243d3SAndroid Build Coastguard Worker /// When a mask bit is 1, the corresponding 64-bit element in operand \a V2
382*bed243d3SAndroid Build Coastguard Worker /// is copied to the same position in the result.
383*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
384*bed243d3SAndroid Build Coastguard Worker #define _mm_blend_pd(V1, V2, M) \
385*bed243d3SAndroid Build Coastguard Worker ((__m128d)__builtin_ia32_blendpd((__v2df)(__m128d)(V1), \
386*bed243d3SAndroid Build Coastguard Worker (__v2df)(__m128d)(V2), (int)(M)))
387*bed243d3SAndroid Build Coastguard Worker
388*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [4 x float] where the values are selected
389*bed243d3SAndroid Build Coastguard Worker /// from either the first or second operand as specified by the third
390*bed243d3SAndroid Build Coastguard Worker /// operand, the control mask.
391*bed243d3SAndroid Build Coastguard Worker ///
392*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
393*bed243d3SAndroid Build Coastguard Worker ///
394*bed243d3SAndroid Build Coastguard Worker /// \code
395*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);
396*bed243d3SAndroid Build Coastguard Worker /// \endcode
397*bed243d3SAndroid Build Coastguard Worker ///
398*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.
399*bed243d3SAndroid Build Coastguard Worker ///
400*bed243d3SAndroid Build Coastguard Worker /// \param V1
401*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
402*bed243d3SAndroid Build Coastguard Worker /// \param V2
403*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
404*bed243d3SAndroid Build Coastguard Worker /// \param M
405*bed243d3SAndroid Build Coastguard Worker /// An immediate integer operand, with mask bits [3:0] specifying how the
406*bed243d3SAndroid Build Coastguard Worker /// values are to be copied. The position of the mask bit corresponds to the
407*bed243d3SAndroid Build Coastguard Worker /// index of a copied value. When a mask bit is 0, the corresponding 32-bit
408*bed243d3SAndroid Build Coastguard Worker /// element in operand \a V1 is copied to the same position in the result.
409*bed243d3SAndroid Build Coastguard Worker /// When a mask bit is 1, the corresponding 32-bit element in operand \a V2
410*bed243d3SAndroid Build Coastguard Worker /// is copied to the same position in the result.
411*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
412*bed243d3SAndroid Build Coastguard Worker #define _mm_blend_ps(V1, V2, M) \
413*bed243d3SAndroid Build Coastguard Worker ((__m128)__builtin_ia32_blendps((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
414*bed243d3SAndroid Build Coastguard Worker (int)(M)))
415*bed243d3SAndroid Build Coastguard Worker
416*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [2 x double] where the values are
417*bed243d3SAndroid Build Coastguard Worker /// selected from either the first or second operand as specified by the
418*bed243d3SAndroid Build Coastguard Worker /// third operand, the control mask.
419*bed243d3SAndroid Build Coastguard Worker ///
420*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
421*bed243d3SAndroid Build Coastguard Worker ///
422*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.
423*bed243d3SAndroid Build Coastguard Worker ///
424*bed243d3SAndroid Build Coastguard Worker /// \param __V1
425*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
426*bed243d3SAndroid Build Coastguard Worker /// \param __V2
427*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
428*bed243d3SAndroid Build Coastguard Worker /// \param __M
429*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector operand, with mask bits 127 and 63 specifying how the
430*bed243d3SAndroid Build Coastguard Worker /// values are to be copied. The position of the mask bit corresponds to the
431*bed243d3SAndroid Build Coastguard Worker /// most significant bit of a copied value. When a mask bit is 0, the
432*bed243d3SAndroid Build Coastguard Worker /// corresponding 64-bit element in operand \a __V1 is copied to the same
433*bed243d3SAndroid Build Coastguard Worker /// position in the result. When a mask bit is 1, the corresponding 64-bit
434*bed243d3SAndroid Build Coastguard Worker /// element in operand \a __V2 is copied to the same position in the result.
435*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
_mm_blendv_pd(__m128d __V1,__m128d __V2,__m128d __M)436*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
437*bed243d3SAndroid Build Coastguard Worker __m128d __V2,
438*bed243d3SAndroid Build Coastguard Worker __m128d __M) {
439*bed243d3SAndroid Build Coastguard Worker return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2,
440*bed243d3SAndroid Build Coastguard Worker (__v2df)__M);
441*bed243d3SAndroid Build Coastguard Worker }
442*bed243d3SAndroid Build Coastguard Worker
443*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [4 x float] where the values are
444*bed243d3SAndroid Build Coastguard Worker /// selected from either the first or second operand as specified by the
445*bed243d3SAndroid Build Coastguard Worker /// third operand, the control mask.
446*bed243d3SAndroid Build Coastguard Worker ///
447*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
448*bed243d3SAndroid Build Coastguard Worker ///
449*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.
450*bed243d3SAndroid Build Coastguard Worker ///
451*bed243d3SAndroid Build Coastguard Worker /// \param __V1
452*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
453*bed243d3SAndroid Build Coastguard Worker /// \param __V2
454*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
455*bed243d3SAndroid Build Coastguard Worker /// \param __M
456*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying
457*bed243d3SAndroid Build Coastguard Worker /// how the values are to be copied. The position of the mask bit corresponds
458*bed243d3SAndroid Build Coastguard Worker /// to the most significant bit of a copied value. When a mask bit is 0, the
459*bed243d3SAndroid Build Coastguard Worker /// corresponding 32-bit element in operand \a __V1 is copied to the same
460*bed243d3SAndroid Build Coastguard Worker /// position in the result. When a mask bit is 1, the corresponding 32-bit
461*bed243d3SAndroid Build Coastguard Worker /// element in operand \a __V2 is copied to the same position in the result.
462*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
_mm_blendv_ps(__m128 __V1,__m128 __V2,__m128 __M)463*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
464*bed243d3SAndroid Build Coastguard Worker __m128 __V2,
465*bed243d3SAndroid Build Coastguard Worker __m128 __M) {
466*bed243d3SAndroid Build Coastguard Worker return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2,
467*bed243d3SAndroid Build Coastguard Worker (__v4sf)__M);
468*bed243d3SAndroid Build Coastguard Worker }
469*bed243d3SAndroid Build Coastguard Worker
470*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [16 x i8] where the values are selected
471*bed243d3SAndroid Build Coastguard Worker /// from either of the first or second operand as specified by the third
472*bed243d3SAndroid Build Coastguard Worker /// operand, the control mask.
473*bed243d3SAndroid Build Coastguard Worker ///
474*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
475*bed243d3SAndroid Build Coastguard Worker ///
476*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.
477*bed243d3SAndroid Build Coastguard Worker ///
478*bed243d3SAndroid Build Coastguard Worker /// \param __V1
479*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
480*bed243d3SAndroid Build Coastguard Worker /// \param __V2
481*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
482*bed243d3SAndroid Build Coastguard Worker /// \param __M
483*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
484*bed243d3SAndroid Build Coastguard Worker /// how the values are to be copied. The position of the mask bit corresponds
485*bed243d3SAndroid Build Coastguard Worker /// to the most significant bit of a copied value. When a mask bit is 0, the
486*bed243d3SAndroid Build Coastguard Worker /// corresponding 8-bit element in operand \a __V1 is copied to the same
487*bed243d3SAndroid Build Coastguard Worker /// position in the result. When a mask bit is 1, the corresponding 8-bit
488*bed243d3SAndroid Build Coastguard Worker /// element in operand \a __V2 is copied to the same position in the result.
489*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [16 x i8] containing the copied values.
_mm_blendv_epi8(__m128i __V1,__m128i __V2,__m128i __M)490*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1,
491*bed243d3SAndroid Build Coastguard Worker __m128i __V2,
492*bed243d3SAndroid Build Coastguard Worker __m128i __M) {
493*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2,
494*bed243d3SAndroid Build Coastguard Worker (__v16qi)__M);
495*bed243d3SAndroid Build Coastguard Worker }
496*bed243d3SAndroid Build Coastguard Worker
497*bed243d3SAndroid Build Coastguard Worker /// Returns a 128-bit vector of [8 x i16] where the values are selected
498*bed243d3SAndroid Build Coastguard Worker /// from either of the first or second operand as specified by the third
499*bed243d3SAndroid Build Coastguard Worker /// operand, the control mask.
500*bed243d3SAndroid Build Coastguard Worker ///
501*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
502*bed243d3SAndroid Build Coastguard Worker ///
503*bed243d3SAndroid Build Coastguard Worker /// \code
504*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);
505*bed243d3SAndroid Build Coastguard Worker /// \endcode
506*bed243d3SAndroid Build Coastguard Worker ///
507*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.
508*bed243d3SAndroid Build Coastguard Worker ///
509*bed243d3SAndroid Build Coastguard Worker /// \param V1
510*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16].
511*bed243d3SAndroid Build Coastguard Worker /// \param V2
512*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16].
513*bed243d3SAndroid Build Coastguard Worker /// \param M
514*bed243d3SAndroid Build Coastguard Worker /// An immediate integer operand, with mask bits [7:0] specifying how the
515*bed243d3SAndroid Build Coastguard Worker /// values are to be copied. The position of the mask bit corresponds to the
516*bed243d3SAndroid Build Coastguard Worker /// index of a copied value. When a mask bit is 0, the corresponding 16-bit
517*bed243d3SAndroid Build Coastguard Worker /// element in operand \a V1 is copied to the same position in the result.
518*bed243d3SAndroid Build Coastguard Worker /// When a mask bit is 1, the corresponding 16-bit element in operand \a V2
519*bed243d3SAndroid Build Coastguard Worker /// is copied to the same position in the result.
520*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x i16] containing the copied values.
521*bed243d3SAndroid Build Coastguard Worker #define _mm_blend_epi16(V1, V2, M) \
522*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_pblendw128((__v8hi)(__m128i)(V1), \
523*bed243d3SAndroid Build Coastguard Worker (__v8hi)(__m128i)(V2), (int)(M)))
524*bed243d3SAndroid Build Coastguard Worker
525*bed243d3SAndroid Build Coastguard Worker /* SSE4 Dword Multiply Instructions. */
526*bed243d3SAndroid Build Coastguard Worker /// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
527*bed243d3SAndroid Build Coastguard Worker /// and returns the lower 32 bits of the each product in a 128-bit vector of
528*bed243d3SAndroid Build Coastguard Worker /// [4 x i32].
529*bed243d3SAndroid Build Coastguard Worker ///
530*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
531*bed243d3SAndroid Build Coastguard Worker ///
532*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.
533*bed243d3SAndroid Build Coastguard Worker ///
534*bed243d3SAndroid Build Coastguard Worker /// \param __V1
535*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
536*bed243d3SAndroid Build Coastguard Worker /// \param __V2
537*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
538*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the products of both operands.
_mm_mullo_epi32(__m128i __V1,__m128i __V2)539*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
540*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
541*bed243d3SAndroid Build Coastguard Worker return (__m128i)((__v4su)__V1 * (__v4su)__V2);
542*bed243d3SAndroid Build Coastguard Worker }
543*bed243d3SAndroid Build Coastguard Worker
544*bed243d3SAndroid Build Coastguard Worker /// Multiplies corresponding even-indexed elements of two 128-bit
545*bed243d3SAndroid Build Coastguard Worker /// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]
546*bed243d3SAndroid Build Coastguard Worker /// containing the products.
547*bed243d3SAndroid Build Coastguard Worker ///
548*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
549*bed243d3SAndroid Build Coastguard Worker ///
550*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.
551*bed243d3SAndroid Build Coastguard Worker ///
552*bed243d3SAndroid Build Coastguard Worker /// \param __V1
553*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
554*bed243d3SAndroid Build Coastguard Worker /// \param __V2
555*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
556*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the products of both
557*bed243d3SAndroid Build Coastguard Worker /// operands.
_mm_mul_epi32(__m128i __V1,__m128i __V2)558*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
559*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
560*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_pmuldq128((__v4si)__V1, (__v4si)__V2);
561*bed243d3SAndroid Build Coastguard Worker }
562*bed243d3SAndroid Build Coastguard Worker
563*bed243d3SAndroid Build Coastguard Worker /* SSE4 Floating Point Dot Product Instructions. */
564*bed243d3SAndroid Build Coastguard Worker /// Computes the dot product of the two 128-bit vectors of [4 x float]
565*bed243d3SAndroid Build Coastguard Worker /// and returns it in the elements of the 128-bit result vector of
566*bed243d3SAndroid Build Coastguard Worker /// [4 x float].
567*bed243d3SAndroid Build Coastguard Worker ///
568*bed243d3SAndroid Build Coastguard Worker /// The immediate integer operand controls which input elements
569*bed243d3SAndroid Build Coastguard Worker /// will contribute to the dot product, and where the final results are
570*bed243d3SAndroid Build Coastguard Worker /// returned.
571*bed243d3SAndroid Build Coastguard Worker ///
572*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
573*bed243d3SAndroid Build Coastguard Worker ///
574*bed243d3SAndroid Build Coastguard Worker /// \code
575*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);
576*bed243d3SAndroid Build Coastguard Worker /// \endcode
577*bed243d3SAndroid Build Coastguard Worker ///
578*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.
579*bed243d3SAndroid Build Coastguard Worker ///
580*bed243d3SAndroid Build Coastguard Worker /// \param X
581*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
582*bed243d3SAndroid Build Coastguard Worker /// \param Y
583*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
584*bed243d3SAndroid Build Coastguard Worker /// \param M
585*bed243d3SAndroid Build Coastguard Worker /// An immediate integer operand. Mask bits [7:4] determine which elements
586*bed243d3SAndroid Build Coastguard Worker /// of the input vectors are used, with bit [4] corresponding to the lowest
587*bed243d3SAndroid Build Coastguard Worker /// element and bit [7] corresponding to the highest element of each [4 x
588*bed243d3SAndroid Build Coastguard Worker /// float] vector. If a bit is set, the corresponding elements from the two
589*bed243d3SAndroid Build Coastguard Worker /// input vectors are used as an input for dot product; otherwise that input
590*bed243d3SAndroid Build Coastguard Worker /// is treated as zero. Bits [3:0] determine which elements of the result
591*bed243d3SAndroid Build Coastguard Worker /// will receive a copy of the final dot product, with bit [0] corresponding
592*bed243d3SAndroid Build Coastguard Worker /// to the lowest element and bit [3] corresponding to the highest element of
593*bed243d3SAndroid Build Coastguard Worker /// each [4 x float] subvector. If a bit is set, the dot product is returned
594*bed243d3SAndroid Build Coastguard Worker /// in the corresponding element; otherwise that element is set to zero.
595*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the dot product.
596*bed243d3SAndroid Build Coastguard Worker #define _mm_dp_ps(X, Y, M) \
597*bed243d3SAndroid Build Coastguard Worker ((__m128)__builtin_ia32_dpps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), (M)))
598*bed243d3SAndroid Build Coastguard Worker
599*bed243d3SAndroid Build Coastguard Worker /// Computes the dot product of the two 128-bit vectors of [2 x double]
600*bed243d3SAndroid Build Coastguard Worker /// and returns it in the elements of the 128-bit result vector of
601*bed243d3SAndroid Build Coastguard Worker /// [2 x double].
602*bed243d3SAndroid Build Coastguard Worker ///
603*bed243d3SAndroid Build Coastguard Worker /// The immediate integer operand controls which input
604*bed243d3SAndroid Build Coastguard Worker /// elements will contribute to the dot product, and where the final results
605*bed243d3SAndroid Build Coastguard Worker /// are returned.
606*bed243d3SAndroid Build Coastguard Worker ///
607*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
608*bed243d3SAndroid Build Coastguard Worker ///
609*bed243d3SAndroid Build Coastguard Worker /// \code
610*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);
611*bed243d3SAndroid Build Coastguard Worker /// \endcode
612*bed243d3SAndroid Build Coastguard Worker ///
613*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.
614*bed243d3SAndroid Build Coastguard Worker ///
615*bed243d3SAndroid Build Coastguard Worker /// \param X
616*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
617*bed243d3SAndroid Build Coastguard Worker /// \param Y
618*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x double].
619*bed243d3SAndroid Build Coastguard Worker /// \param M
620*bed243d3SAndroid Build Coastguard Worker /// An immediate integer operand. Mask bits [5:4] determine which elements
621*bed243d3SAndroid Build Coastguard Worker /// of the input vectors are used, with bit [4] corresponding to the lowest
622*bed243d3SAndroid Build Coastguard Worker /// element and bit [5] corresponding to the highest element of each of [2 x
623*bed243d3SAndroid Build Coastguard Worker /// double] vector. If a bit is set, the corresponding elements from the two
624*bed243d3SAndroid Build Coastguard Worker /// input vectors are used as an input for dot product; otherwise that input
625*bed243d3SAndroid Build Coastguard Worker /// is treated as zero. Bits [1:0] determine which elements of the result
626*bed243d3SAndroid Build Coastguard Worker /// will receive a copy of the final dot product, with bit [0] corresponding
627*bed243d3SAndroid Build Coastguard Worker /// to the lowest element and bit [1] corresponding to the highest element of
628*bed243d3SAndroid Build Coastguard Worker /// each [2 x double] vector. If a bit is set, the dot product is returned in
629*bed243d3SAndroid Build Coastguard Worker /// the corresponding element; otherwise that element is set to zero.
630*bed243d3SAndroid Build Coastguard Worker #define _mm_dp_pd(X, Y, M) \
631*bed243d3SAndroid Build Coastguard Worker ((__m128d)__builtin_ia32_dppd((__v2df)(__m128d)(X), (__v2df)(__m128d)(Y), \
632*bed243d3SAndroid Build Coastguard Worker (M)))
633*bed243d3SAndroid Build Coastguard Worker
634*bed243d3SAndroid Build Coastguard Worker /* SSE4 Streaming Load Hint Instruction. */
635*bed243d3SAndroid Build Coastguard Worker /// Loads integer values from a 128-bit aligned memory location to a
636*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector.
637*bed243d3SAndroid Build Coastguard Worker ///
638*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
639*bed243d3SAndroid Build Coastguard Worker ///
640*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.
641*bed243d3SAndroid Build Coastguard Worker ///
642*bed243d3SAndroid Build Coastguard Worker /// \param __V
643*bed243d3SAndroid Build Coastguard Worker /// A pointer to a 128-bit aligned memory location that contains the integer
644*bed243d3SAndroid Build Coastguard Worker /// values.
645*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the data stored at the
646*bed243d3SAndroid Build Coastguard Worker /// specified memory location.
647*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_stream_load_si128(const void * __V)648*bed243d3SAndroid Build Coastguard Worker _mm_stream_load_si128(const void *__V) {
649*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
650*bed243d3SAndroid Build Coastguard Worker }
651*bed243d3SAndroid Build Coastguard Worker
652*bed243d3SAndroid Build Coastguard Worker /* SSE4 Packed Integer Min/Max Instructions. */
653*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
654*bed243d3SAndroid Build Coastguard Worker /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser
655*bed243d3SAndroid Build Coastguard Worker /// of the two values.
656*bed243d3SAndroid Build Coastguard Worker ///
657*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
658*bed243d3SAndroid Build Coastguard Worker ///
659*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.
660*bed243d3SAndroid Build Coastguard Worker ///
661*bed243d3SAndroid Build Coastguard Worker /// \param __V1
662*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
663*bed243d3SAndroid Build Coastguard Worker /// \param __V2
664*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]
665*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [16 x i8] containing the lesser values.
_mm_min_epi8(__m128i __V1,__m128i __V2)666*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
667*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
668*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2);
669*bed243d3SAndroid Build Coastguard Worker }
670*bed243d3SAndroid Build Coastguard Worker
671*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
672*bed243d3SAndroid Build Coastguard Worker /// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the
673*bed243d3SAndroid Build Coastguard Worker /// greater value of the two.
674*bed243d3SAndroid Build Coastguard Worker ///
675*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
676*bed243d3SAndroid Build Coastguard Worker ///
677*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.
678*bed243d3SAndroid Build Coastguard Worker ///
679*bed243d3SAndroid Build Coastguard Worker /// \param __V1
680*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
681*bed243d3SAndroid Build Coastguard Worker /// \param __V2
682*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
683*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [16 x i8] containing the greater values.
_mm_max_epi8(__m128i __V1,__m128i __V2)684*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
685*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
686*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2);
687*bed243d3SAndroid Build Coastguard Worker }
688*bed243d3SAndroid Build Coastguard Worker
689*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
690*bed243d3SAndroid Build Coastguard Worker /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser
691*bed243d3SAndroid Build Coastguard Worker /// value of the two.
692*bed243d3SAndroid Build Coastguard Worker ///
693*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
694*bed243d3SAndroid Build Coastguard Worker ///
695*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.
696*bed243d3SAndroid Build Coastguard Worker ///
697*bed243d3SAndroid Build Coastguard Worker /// \param __V1
698*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x u16].
699*bed243d3SAndroid Build Coastguard Worker /// \param __V2
700*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x u16].
701*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x u16] containing the lesser values.
_mm_min_epu16(__m128i __V1,__m128i __V2)702*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
703*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
704*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2);
705*bed243d3SAndroid Build Coastguard Worker }
706*bed243d3SAndroid Build Coastguard Worker
707*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
708*bed243d3SAndroid Build Coastguard Worker /// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the
709*bed243d3SAndroid Build Coastguard Worker /// greater value of the two.
710*bed243d3SAndroid Build Coastguard Worker ///
711*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
712*bed243d3SAndroid Build Coastguard Worker ///
713*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.
714*bed243d3SAndroid Build Coastguard Worker ///
715*bed243d3SAndroid Build Coastguard Worker /// \param __V1
716*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x u16].
717*bed243d3SAndroid Build Coastguard Worker /// \param __V2
718*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x u16].
719*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x u16] containing the greater values.
_mm_max_epu16(__m128i __V1,__m128i __V2)720*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
721*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
722*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2);
723*bed243d3SAndroid Build Coastguard Worker }
724*bed243d3SAndroid Build Coastguard Worker
725*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
726*bed243d3SAndroid Build Coastguard Worker /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser
727*bed243d3SAndroid Build Coastguard Worker /// value of the two.
728*bed243d3SAndroid Build Coastguard Worker ///
729*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
730*bed243d3SAndroid Build Coastguard Worker ///
731*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.
732*bed243d3SAndroid Build Coastguard Worker ///
733*bed243d3SAndroid Build Coastguard Worker /// \param __V1
734*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
735*bed243d3SAndroid Build Coastguard Worker /// \param __V2
736*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
737*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the lesser values.
_mm_min_epi32(__m128i __V1,__m128i __V2)738*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
739*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
740*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2);
741*bed243d3SAndroid Build Coastguard Worker }
742*bed243d3SAndroid Build Coastguard Worker
743*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
744*bed243d3SAndroid Build Coastguard Worker /// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the
745*bed243d3SAndroid Build Coastguard Worker /// greater value of the two.
746*bed243d3SAndroid Build Coastguard Worker ///
747*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
748*bed243d3SAndroid Build Coastguard Worker ///
749*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.
750*bed243d3SAndroid Build Coastguard Worker ///
751*bed243d3SAndroid Build Coastguard Worker /// \param __V1
752*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
753*bed243d3SAndroid Build Coastguard Worker /// \param __V2
754*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32].
755*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the greater values.
_mm_max_epi32(__m128i __V1,__m128i __V2)756*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
757*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
758*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2);
759*bed243d3SAndroid Build Coastguard Worker }
760*bed243d3SAndroid Build Coastguard Worker
761*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
762*bed243d3SAndroid Build Coastguard Worker /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser
763*bed243d3SAndroid Build Coastguard Worker /// value of the two.
764*bed243d3SAndroid Build Coastguard Worker ///
765*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
766*bed243d3SAndroid Build Coastguard Worker ///
767*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.
768*bed243d3SAndroid Build Coastguard Worker ///
769*bed243d3SAndroid Build Coastguard Worker /// \param __V1
770*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x u32].
771*bed243d3SAndroid Build Coastguard Worker /// \param __V2
772*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x u32].
773*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x u32] containing the lesser values.
_mm_min_epu32(__m128i __V1,__m128i __V2)774*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
775*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
776*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2);
777*bed243d3SAndroid Build Coastguard Worker }
778*bed243d3SAndroid Build Coastguard Worker
779*bed243d3SAndroid Build Coastguard Worker /// Compares the corresponding elements of two 128-bit vectors of
780*bed243d3SAndroid Build Coastguard Worker /// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the
781*bed243d3SAndroid Build Coastguard Worker /// greater value of the two.
782*bed243d3SAndroid Build Coastguard Worker ///
783*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
784*bed243d3SAndroid Build Coastguard Worker ///
785*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.
786*bed243d3SAndroid Build Coastguard Worker ///
787*bed243d3SAndroid Build Coastguard Worker /// \param __V1
788*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x u32].
789*bed243d3SAndroid Build Coastguard Worker /// \param __V2
790*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x u32].
791*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x u32] containing the greater values.
_mm_max_epu32(__m128i __V1,__m128i __V2)792*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1,
793*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
794*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2);
795*bed243d3SAndroid Build Coastguard Worker }
796*bed243d3SAndroid Build Coastguard Worker
797*bed243d3SAndroid Build Coastguard Worker /* SSE4 Insertion and Extraction from XMM Register Instructions. */
798*bed243d3SAndroid Build Coastguard Worker /// Takes the first argument \a X and inserts an element from the second
799*bed243d3SAndroid Build Coastguard Worker /// argument \a Y as selected by the third argument \a N. That result then
800*bed243d3SAndroid Build Coastguard Worker /// has elements zeroed out also as selected by the third argument \a N. The
801*bed243d3SAndroid Build Coastguard Worker /// resulting 128-bit vector of [4 x float] is then returned.
802*bed243d3SAndroid Build Coastguard Worker ///
803*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
804*bed243d3SAndroid Build Coastguard Worker ///
805*bed243d3SAndroid Build Coastguard Worker /// \code
806*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);
807*bed243d3SAndroid Build Coastguard Worker /// \endcode
808*bed243d3SAndroid Build Coastguard Worker ///
809*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.
810*bed243d3SAndroid Build Coastguard Worker ///
811*bed243d3SAndroid Build Coastguard Worker /// \param X
812*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector source operand of [4 x float]. With the exception of
813*bed243d3SAndroid Build Coastguard Worker /// those bits in the result copied from parameter \a Y and zeroed by bits
814*bed243d3SAndroid Build Coastguard Worker /// [3:0] of \a N, all bits from this parameter are copied to the result.
815*bed243d3SAndroid Build Coastguard Worker /// \param Y
816*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector source operand of [4 x float]. One single-precision
817*bed243d3SAndroid Build Coastguard Worker /// floating-point element from this source, as determined by the immediate
818*bed243d3SAndroid Build Coastguard Worker /// parameter, is copied to the result.
819*bed243d3SAndroid Build Coastguard Worker /// \param N
820*bed243d3SAndroid Build Coastguard Worker /// Specifies which bits from operand \a Y will be copied, which bits in the
821*bed243d3SAndroid Build Coastguard Worker /// result they will be copied to, and which bits in the result will be
822*bed243d3SAndroid Build Coastguard Worker /// cleared. The following assignments are made: \n
823*bed243d3SAndroid Build Coastguard Worker /// Bits [7:6] specify the bits to copy from operand \a Y: \n
824*bed243d3SAndroid Build Coastguard Worker /// 00: Selects bits [31:0] from operand \a Y. \n
825*bed243d3SAndroid Build Coastguard Worker /// 01: Selects bits [63:32] from operand \a Y. \n
826*bed243d3SAndroid Build Coastguard Worker /// 10: Selects bits [95:64] from operand \a Y. \n
827*bed243d3SAndroid Build Coastguard Worker /// 11: Selects bits [127:96] from operand \a Y. \n
828*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4] specify the bits in the result to which the selected bits
829*bed243d3SAndroid Build Coastguard Worker /// from operand \a Y are copied: \n
830*bed243d3SAndroid Build Coastguard Worker /// 00: Copies the selected bits from \a Y to result bits [31:0]. \n
831*bed243d3SAndroid Build Coastguard Worker /// 01: Copies the selected bits from \a Y to result bits [63:32]. \n
832*bed243d3SAndroid Build Coastguard Worker /// 10: Copies the selected bits from \a Y to result bits [95:64]. \n
833*bed243d3SAndroid Build Coastguard Worker /// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
834*bed243d3SAndroid Build Coastguard Worker /// Bits[3:0]: If any of these bits are set, the corresponding result
835*bed243d3SAndroid Build Coastguard Worker /// element is cleared.
836*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied
837*bed243d3SAndroid Build Coastguard Worker /// single-precision floating point elements from the operands.
838*bed243d3SAndroid Build Coastguard Worker #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
839*bed243d3SAndroid Build Coastguard Worker
840*bed243d3SAndroid Build Coastguard Worker /// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
841*bed243d3SAndroid Build Coastguard Worker /// returns it, using the immediate value parameter \a N as a selector.
842*bed243d3SAndroid Build Coastguard Worker ///
843*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
844*bed243d3SAndroid Build Coastguard Worker ///
845*bed243d3SAndroid Build Coastguard Worker /// \code
846*bed243d3SAndroid Build Coastguard Worker /// int _mm_extract_ps(__m128 X, const int N);
847*bed243d3SAndroid Build Coastguard Worker /// \endcode
848*bed243d3SAndroid Build Coastguard Worker ///
849*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>
850*bed243d3SAndroid Build Coastguard Worker /// instruction.
851*bed243d3SAndroid Build Coastguard Worker ///
852*bed243d3SAndroid Build Coastguard Worker /// \param X
853*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x float].
854*bed243d3SAndroid Build Coastguard Worker /// \param N
855*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bits [1:0] determines which bits from the argument
856*bed243d3SAndroid Build Coastguard Worker /// \a X are extracted and returned: \n
857*bed243d3SAndroid Build Coastguard Worker /// 00: Bits [31:0] of parameter \a X are returned. \n
858*bed243d3SAndroid Build Coastguard Worker /// 01: Bits [63:32] of parameter \a X are returned. \n
859*bed243d3SAndroid Build Coastguard Worker /// 10: Bits [95:64] of parameter \a X are returned. \n
860*bed243d3SAndroid Build Coastguard Worker /// 11: Bits [127:96] of parameter \a X are returned.
861*bed243d3SAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 32 bits of float data.
862*bed243d3SAndroid Build Coastguard Worker #define _mm_extract_ps(X, N) \
863*bed243d3SAndroid Build Coastguard Worker __builtin_bit_cast( \
864*bed243d3SAndroid Build Coastguard Worker int, __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)))
865*bed243d3SAndroid Build Coastguard Worker
866*bed243d3SAndroid Build Coastguard Worker /* Miscellaneous insert and extract macros. */
867*bed243d3SAndroid Build Coastguard Worker /* Extract a single-precision float from X at index N into D. */
868*bed243d3SAndroid Build Coastguard Worker #define _MM_EXTRACT_FLOAT(D, X, N) \
869*bed243d3SAndroid Build Coastguard Worker do { \
870*bed243d3SAndroid Build Coastguard Worker (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
871*bed243d3SAndroid Build Coastguard Worker } while (0)
872*bed243d3SAndroid Build Coastguard Worker
873*bed243d3SAndroid Build Coastguard Worker /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
874*bed243d3SAndroid Build Coastguard Worker an index suitable for _mm_insert_ps. */
875*bed243d3SAndroid Build Coastguard Worker #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
876*bed243d3SAndroid Build Coastguard Worker
877*bed243d3SAndroid Build Coastguard Worker /* Extract a float from X at index N into the first index of the return. */
878*bed243d3SAndroid Build Coastguard Worker #define _MM_PICK_OUT_PS(X, N) \
879*bed243d3SAndroid Build Coastguard Worker _mm_insert_ps(_mm_setzero_ps(), (X), _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
880*bed243d3SAndroid Build Coastguard Worker
881*bed243d3SAndroid Build Coastguard Worker /* Insert int into packed integer array at index. */
882*bed243d3SAndroid Build Coastguard Worker /// Constructs a 128-bit vector of [16 x i8] by first making a copy of
883*bed243d3SAndroid Build Coastguard Worker /// the 128-bit integer vector parameter, and then inserting the lower 8 bits
884*bed243d3SAndroid Build Coastguard Worker /// of an integer parameter \a I into an offset specified by the immediate
885*bed243d3SAndroid Build Coastguard Worker /// value parameter \a N.
886*bed243d3SAndroid Build Coastguard Worker ///
887*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
888*bed243d3SAndroid Build Coastguard Worker ///
889*bed243d3SAndroid Build Coastguard Worker /// \code
890*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_insert_epi8(__m128i X, int I, const int N);
891*bed243d3SAndroid Build Coastguard Worker /// \endcode
892*bed243d3SAndroid Build Coastguard Worker ///
893*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.
894*bed243d3SAndroid Build Coastguard Worker ///
895*bed243d3SAndroid Build Coastguard Worker /// \param X
896*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector of [16 x i8]. This vector is copied to the
897*bed243d3SAndroid Build Coastguard Worker /// result and then one of the sixteen elements in the result vector is
898*bed243d3SAndroid Build Coastguard Worker /// replaced by the lower 8 bits of \a I.
899*bed243d3SAndroid Build Coastguard Worker /// \param I
900*bed243d3SAndroid Build Coastguard Worker /// An integer. The lower 8 bits of this operand are written to the result
901*bed243d3SAndroid Build Coastguard Worker /// beginning at the offset specified by \a N.
902*bed243d3SAndroid Build Coastguard Worker /// \param N
903*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bits [3:0] specify the bit offset in the result at
904*bed243d3SAndroid Build Coastguard Worker /// which the lower 8 bits of \a I are written. \n
905*bed243d3SAndroid Build Coastguard Worker /// 0000: Bits [7:0] of the result are used for insertion. \n
906*bed243d3SAndroid Build Coastguard Worker /// 0001: Bits [15:8] of the result are used for insertion. \n
907*bed243d3SAndroid Build Coastguard Worker /// 0010: Bits [23:16] of the result are used for insertion. \n
908*bed243d3SAndroid Build Coastguard Worker /// 0011: Bits [31:24] of the result are used for insertion. \n
909*bed243d3SAndroid Build Coastguard Worker /// 0100: Bits [39:32] of the result are used for insertion. \n
910*bed243d3SAndroid Build Coastguard Worker /// 0101: Bits [47:40] of the result are used for insertion. \n
911*bed243d3SAndroid Build Coastguard Worker /// 0110: Bits [55:48] of the result are used for insertion. \n
912*bed243d3SAndroid Build Coastguard Worker /// 0111: Bits [63:56] of the result are used for insertion. \n
913*bed243d3SAndroid Build Coastguard Worker /// 1000: Bits [71:64] of the result are used for insertion. \n
914*bed243d3SAndroid Build Coastguard Worker /// 1001: Bits [79:72] of the result are used for insertion. \n
915*bed243d3SAndroid Build Coastguard Worker /// 1010: Bits [87:80] of the result are used for insertion. \n
916*bed243d3SAndroid Build Coastguard Worker /// 1011: Bits [95:88] of the result are used for insertion. \n
917*bed243d3SAndroid Build Coastguard Worker /// 1100: Bits [103:96] of the result are used for insertion. \n
918*bed243d3SAndroid Build Coastguard Worker /// 1101: Bits [111:104] of the result are used for insertion. \n
919*bed243d3SAndroid Build Coastguard Worker /// 1110: Bits [119:112] of the result are used for insertion. \n
920*bed243d3SAndroid Build Coastguard Worker /// 1111: Bits [127:120] of the result are used for insertion.
921*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the constructed values.
922*bed243d3SAndroid Build Coastguard Worker #define _mm_insert_epi8(X, I, N) \
923*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), (int)(I), \
924*bed243d3SAndroid Build Coastguard Worker (int)(N)))
925*bed243d3SAndroid Build Coastguard Worker
926*bed243d3SAndroid Build Coastguard Worker /// Constructs a 128-bit vector of [4 x i32] by first making a copy of
927*bed243d3SAndroid Build Coastguard Worker /// the 128-bit integer vector parameter, and then inserting the 32-bit
928*bed243d3SAndroid Build Coastguard Worker /// integer parameter \a I at the offset specified by the immediate value
929*bed243d3SAndroid Build Coastguard Worker /// parameter \a N.
930*bed243d3SAndroid Build Coastguard Worker ///
931*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
932*bed243d3SAndroid Build Coastguard Worker ///
933*bed243d3SAndroid Build Coastguard Worker /// \code
934*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_insert_epi32(__m128i X, int I, const int N);
935*bed243d3SAndroid Build Coastguard Worker /// \endcode
936*bed243d3SAndroid Build Coastguard Worker ///
937*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.
938*bed243d3SAndroid Build Coastguard Worker ///
939*bed243d3SAndroid Build Coastguard Worker /// \param X
940*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector of [4 x i32]. This vector is copied to the
941*bed243d3SAndroid Build Coastguard Worker /// result and then one of the four elements in the result vector is
942*bed243d3SAndroid Build Coastguard Worker /// replaced by \a I.
943*bed243d3SAndroid Build Coastguard Worker /// \param I
944*bed243d3SAndroid Build Coastguard Worker /// A 32-bit integer that is written to the result beginning at the offset
945*bed243d3SAndroid Build Coastguard Worker /// specified by \a N.
946*bed243d3SAndroid Build Coastguard Worker /// \param N
947*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bits [1:0] specify the bit offset in the result at
948*bed243d3SAndroid Build Coastguard Worker /// which the integer \a I is written. \n
949*bed243d3SAndroid Build Coastguard Worker /// 00: Bits [31:0] of the result are used for insertion. \n
950*bed243d3SAndroid Build Coastguard Worker /// 01: Bits [63:32] of the result are used for insertion. \n
951*bed243d3SAndroid Build Coastguard Worker /// 10: Bits [95:64] of the result are used for insertion. \n
952*bed243d3SAndroid Build Coastguard Worker /// 11: Bits [127:96] of the result are used for insertion.
953*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the constructed values.
954*bed243d3SAndroid Build Coastguard Worker #define _mm_insert_epi32(X, I, N) \
955*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), (int)(I), \
956*bed243d3SAndroid Build Coastguard Worker (int)(N)))
957*bed243d3SAndroid Build Coastguard Worker
958*bed243d3SAndroid Build Coastguard Worker #ifdef __x86_64__
959*bed243d3SAndroid Build Coastguard Worker /// Constructs a 128-bit vector of [2 x i64] by first making a copy of
960*bed243d3SAndroid Build Coastguard Worker /// the 128-bit integer vector parameter, and then inserting the 64-bit
961*bed243d3SAndroid Build Coastguard Worker /// integer parameter \a I, using the immediate value parameter \a N as an
962*bed243d3SAndroid Build Coastguard Worker /// insertion location selector.
963*bed243d3SAndroid Build Coastguard Worker ///
964*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
965*bed243d3SAndroid Build Coastguard Worker ///
966*bed243d3SAndroid Build Coastguard Worker /// \code
967*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);
968*bed243d3SAndroid Build Coastguard Worker /// \endcode
969*bed243d3SAndroid Build Coastguard Worker ///
970*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.
971*bed243d3SAndroid Build Coastguard Worker ///
972*bed243d3SAndroid Build Coastguard Worker /// \param X
973*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector of [2 x i64]. This vector is copied to the
974*bed243d3SAndroid Build Coastguard Worker /// result and then one of the two elements in the result vector is replaced
975*bed243d3SAndroid Build Coastguard Worker /// by \a I.
976*bed243d3SAndroid Build Coastguard Worker /// \param I
977*bed243d3SAndroid Build Coastguard Worker /// A 64-bit integer that is written to the result beginning at the offset
978*bed243d3SAndroid Build Coastguard Worker /// specified by \a N.
979*bed243d3SAndroid Build Coastguard Worker /// \param N
980*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bit [0] specifies the bit offset in the result at
981*bed243d3SAndroid Build Coastguard Worker /// which the integer \a I is written. \n
982*bed243d3SAndroid Build Coastguard Worker /// 0: Bits [63:0] of the result are used for insertion. \n
983*bed243d3SAndroid Build Coastguard Worker /// 1: Bits [127:64] of the result are used for insertion. \n
984*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the constructed values.
985*bed243d3SAndroid Build Coastguard Worker #define _mm_insert_epi64(X, I, N) \
986*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), (long long)(I), \
987*bed243d3SAndroid Build Coastguard Worker (int)(N)))
988*bed243d3SAndroid Build Coastguard Worker #endif /* __x86_64__ */
989*bed243d3SAndroid Build Coastguard Worker
990*bed243d3SAndroid Build Coastguard Worker /* Extract int from packed integer array at index. This returns the element
991*bed243d3SAndroid Build Coastguard Worker * as a zero extended value, so it is unsigned.
992*bed243d3SAndroid Build Coastguard Worker */
993*bed243d3SAndroid Build Coastguard Worker /// Extracts an 8-bit element from the 128-bit integer vector of
994*bed243d3SAndroid Build Coastguard Worker /// [16 x i8], using the immediate value parameter \a N as a selector.
995*bed243d3SAndroid Build Coastguard Worker ///
996*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
997*bed243d3SAndroid Build Coastguard Worker ///
998*bed243d3SAndroid Build Coastguard Worker /// \code
999*bed243d3SAndroid Build Coastguard Worker /// int _mm_extract_epi8(__m128i X, const int N);
1000*bed243d3SAndroid Build Coastguard Worker /// \endcode
1001*bed243d3SAndroid Build Coastguard Worker ///
1002*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.
1003*bed243d3SAndroid Build Coastguard Worker ///
1004*bed243d3SAndroid Build Coastguard Worker /// \param X
1005*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
1006*bed243d3SAndroid Build Coastguard Worker /// \param N
1007*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bits [3:0] specify which 8-bit vector element from
1008*bed243d3SAndroid Build Coastguard Worker /// the argument \a X to extract and copy to the result. \n
1009*bed243d3SAndroid Build Coastguard Worker /// 0000: Bits [7:0] of parameter \a X are extracted. \n
1010*bed243d3SAndroid Build Coastguard Worker /// 0001: Bits [15:8] of the parameter \a X are extracted. \n
1011*bed243d3SAndroid Build Coastguard Worker /// 0010: Bits [23:16] of the parameter \a X are extracted. \n
1012*bed243d3SAndroid Build Coastguard Worker /// 0011: Bits [31:24] of the parameter \a X are extracted. \n
1013*bed243d3SAndroid Build Coastguard Worker /// 0100: Bits [39:32] of the parameter \a X are extracted. \n
1014*bed243d3SAndroid Build Coastguard Worker /// 0101: Bits [47:40] of the parameter \a X are extracted. \n
1015*bed243d3SAndroid Build Coastguard Worker /// 0110: Bits [55:48] of the parameter \a X are extracted. \n
1016*bed243d3SAndroid Build Coastguard Worker /// 0111: Bits [63:56] of the parameter \a X are extracted. \n
1017*bed243d3SAndroid Build Coastguard Worker /// 1000: Bits [71:64] of the parameter \a X are extracted. \n
1018*bed243d3SAndroid Build Coastguard Worker /// 1001: Bits [79:72] of the parameter \a X are extracted. \n
1019*bed243d3SAndroid Build Coastguard Worker /// 1010: Bits [87:80] of the parameter \a X are extracted. \n
1020*bed243d3SAndroid Build Coastguard Worker /// 1011: Bits [95:88] of the parameter \a X are extracted. \n
1021*bed243d3SAndroid Build Coastguard Worker /// 1100: Bits [103:96] of the parameter \a X are extracted. \n
1022*bed243d3SAndroid Build Coastguard Worker /// 1101: Bits [111:104] of the parameter \a X are extracted. \n
1023*bed243d3SAndroid Build Coastguard Worker /// 1110: Bits [119:112] of the parameter \a X are extracted. \n
1024*bed243d3SAndroid Build Coastguard Worker /// 1111: Bits [127:120] of the parameter \a X are extracted.
1025*bed243d3SAndroid Build Coastguard Worker /// \returns An unsigned integer, whose lower 8 bits are selected from the
1026*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector parameter and the remaining bits are assigned
1027*bed243d3SAndroid Build Coastguard Worker /// zeros.
1028*bed243d3SAndroid Build Coastguard Worker #define _mm_extract_epi8(X, N) \
1029*bed243d3SAndroid Build Coastguard Worker ((int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
1030*bed243d3SAndroid Build Coastguard Worker (int)(N)))
1031*bed243d3SAndroid Build Coastguard Worker
1032*bed243d3SAndroid Build Coastguard Worker /// Extracts a 32-bit element from the 128-bit integer vector of
1033*bed243d3SAndroid Build Coastguard Worker /// [4 x i32], using the immediate value parameter \a N as a selector.
1034*bed243d3SAndroid Build Coastguard Worker ///
1035*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1036*bed243d3SAndroid Build Coastguard Worker ///
1037*bed243d3SAndroid Build Coastguard Worker /// \code
1038*bed243d3SAndroid Build Coastguard Worker /// int _mm_extract_epi32(__m128i X, const int N);
1039*bed243d3SAndroid Build Coastguard Worker /// \endcode
1040*bed243d3SAndroid Build Coastguard Worker ///
1041*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.
1042*bed243d3SAndroid Build Coastguard Worker ///
1043*bed243d3SAndroid Build Coastguard Worker /// \param X
1044*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
1045*bed243d3SAndroid Build Coastguard Worker /// \param N
1046*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bits [1:0] specify which 32-bit vector element from
1047*bed243d3SAndroid Build Coastguard Worker /// the argument \a X to extract and copy to the result. \n
1048*bed243d3SAndroid Build Coastguard Worker /// 00: Bits [31:0] of the parameter \a X are extracted. \n
1049*bed243d3SAndroid Build Coastguard Worker /// 01: Bits [63:32] of the parameter \a X are extracted. \n
1050*bed243d3SAndroid Build Coastguard Worker /// 10: Bits [95:64] of the parameter \a X are extracted. \n
1051*bed243d3SAndroid Build Coastguard Worker /// 11: Bits [127:96] of the parameter \a X are exracted.
1052*bed243d3SAndroid Build Coastguard Worker /// \returns An integer, whose lower 32 bits are selected from the 128-bit
1053*bed243d3SAndroid Build Coastguard Worker /// integer vector parameter and the remaining bits are assigned zeros.
1054*bed243d3SAndroid Build Coastguard Worker #define _mm_extract_epi32(X, N) \
1055*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N)))
1056*bed243d3SAndroid Build Coastguard Worker
1057*bed243d3SAndroid Build Coastguard Worker /// Extracts a 64-bit element from the 128-bit integer vector of
1058*bed243d3SAndroid Build Coastguard Worker /// [2 x i64], using the immediate value parameter \a N as a selector.
1059*bed243d3SAndroid Build Coastguard Worker ///
1060*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1061*bed243d3SAndroid Build Coastguard Worker ///
1062*bed243d3SAndroid Build Coastguard Worker /// \code
1063*bed243d3SAndroid Build Coastguard Worker /// long long _mm_extract_epi64(__m128i X, const int N);
1064*bed243d3SAndroid Build Coastguard Worker /// \endcode
1065*bed243d3SAndroid Build Coastguard Worker ///
1066*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction
1067*bed243d3SAndroid Build Coastguard Worker /// in 64-bit mode.
1068*bed243d3SAndroid Build Coastguard Worker ///
1069*bed243d3SAndroid Build Coastguard Worker /// \param X
1070*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
1071*bed243d3SAndroid Build Coastguard Worker /// \param N
1072*bed243d3SAndroid Build Coastguard Worker /// An immediate value. Bit [0] specifies which 64-bit vector element from
1073*bed243d3SAndroid Build Coastguard Worker /// the argument \a X to return. \n
1074*bed243d3SAndroid Build Coastguard Worker /// 0: Bits [63:0] are returned. \n
1075*bed243d3SAndroid Build Coastguard Worker /// 1: Bits [127:64] are returned. \n
1076*bed243d3SAndroid Build Coastguard Worker /// \returns A 64-bit integer.
1077*bed243d3SAndroid Build Coastguard Worker #define _mm_extract_epi64(X, N) \
1078*bed243d3SAndroid Build Coastguard Worker ((long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N)))
1079*bed243d3SAndroid Build Coastguard Worker
1080*bed243d3SAndroid Build Coastguard Worker /* SSE4 128-bit Packed Integer Comparisons. */
1081*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are all
1082*bed243d3SAndroid Build Coastguard Worker /// zeros.
1083*bed243d3SAndroid Build Coastguard Worker ///
1084*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1085*bed243d3SAndroid Build Coastguard Worker ///
1086*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1087*bed243d3SAndroid Build Coastguard Worker ///
1088*bed243d3SAndroid Build Coastguard Worker /// \param __M
1089*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1090*bed243d3SAndroid Build Coastguard Worker /// \param __V
1091*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1092*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
_mm_testz_si128(__m128i __M,__m128i __V)1093*bed243d3SAndroid Build Coastguard Worker static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
1094*bed243d3SAndroid Build Coastguard Worker __m128i __V) {
1095*bed243d3SAndroid Build Coastguard Worker return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
1096*bed243d3SAndroid Build Coastguard Worker }
1097*bed243d3SAndroid Build Coastguard Worker
1098*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are all
1099*bed243d3SAndroid Build Coastguard Worker /// ones.
1100*bed243d3SAndroid Build Coastguard Worker ///
1101*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1102*bed243d3SAndroid Build Coastguard Worker ///
1103*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1104*bed243d3SAndroid Build Coastguard Worker ///
1105*bed243d3SAndroid Build Coastguard Worker /// \param __M
1106*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1107*bed243d3SAndroid Build Coastguard Worker /// \param __V
1108*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1109*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the specified bits are all ones; FALSE otherwise.
_mm_testc_si128(__m128i __M,__m128i __V)1110*bed243d3SAndroid Build Coastguard Worker static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
1111*bed243d3SAndroid Build Coastguard Worker __m128i __V) {
1112*bed243d3SAndroid Build Coastguard Worker return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
1113*bed243d3SAndroid Build Coastguard Worker }
1114*bed243d3SAndroid Build Coastguard Worker
1115*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are
1116*bed243d3SAndroid Build Coastguard Worker /// neither all zeros nor all ones.
1117*bed243d3SAndroid Build Coastguard Worker ///
1118*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1119*bed243d3SAndroid Build Coastguard Worker ///
1120*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1121*bed243d3SAndroid Build Coastguard Worker ///
1122*bed243d3SAndroid Build Coastguard Worker /// \param __M
1123*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1124*bed243d3SAndroid Build Coastguard Worker /// \param __V
1125*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector selecting which bits to test in operand \a __M.
1126*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the specified bits are neither all zeros nor all ones;
1127*bed243d3SAndroid Build Coastguard Worker /// FALSE otherwise.
_mm_testnzc_si128(__m128i __M,__m128i __V)1128*bed243d3SAndroid Build Coastguard Worker static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
1129*bed243d3SAndroid Build Coastguard Worker __m128i __V) {
1130*bed243d3SAndroid Build Coastguard Worker return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
1131*bed243d3SAndroid Build Coastguard Worker }
1132*bed243d3SAndroid Build Coastguard Worker
1133*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are all
1134*bed243d3SAndroid Build Coastguard Worker /// ones.
1135*bed243d3SAndroid Build Coastguard Worker ///
1136*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1137*bed243d3SAndroid Build Coastguard Worker ///
1138*bed243d3SAndroid Build Coastguard Worker /// \code
1139*bed243d3SAndroid Build Coastguard Worker /// int _mm_test_all_ones(__m128i V);
1140*bed243d3SAndroid Build Coastguard Worker /// \endcode
1141*bed243d3SAndroid Build Coastguard Worker ///
1142*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1143*bed243d3SAndroid Build Coastguard Worker ///
1144*bed243d3SAndroid Build Coastguard Worker /// \param V
1145*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1146*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the bits specified in the operand are all set to 1; FALSE
1147*bed243d3SAndroid Build Coastguard Worker /// otherwise.
1148*bed243d3SAndroid Build Coastguard Worker #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_set1_epi32(-1))
1149*bed243d3SAndroid Build Coastguard Worker
1150*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are
1151*bed243d3SAndroid Build Coastguard Worker /// neither all zeros nor all ones.
1152*bed243d3SAndroid Build Coastguard Worker ///
1153*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1154*bed243d3SAndroid Build Coastguard Worker ///
1155*bed243d3SAndroid Build Coastguard Worker /// \code
1156*bed243d3SAndroid Build Coastguard Worker /// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);
1157*bed243d3SAndroid Build Coastguard Worker /// \endcode
1158*bed243d3SAndroid Build Coastguard Worker ///
1159*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1160*bed243d3SAndroid Build Coastguard Worker ///
1161*bed243d3SAndroid Build Coastguard Worker /// \param M
1162*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1163*bed243d3SAndroid Build Coastguard Worker /// \param V
1164*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector selecting which bits to test in operand \a M.
1165*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the specified bits are neither all zeros nor all ones;
1166*bed243d3SAndroid Build Coastguard Worker /// FALSE otherwise.
1167*bed243d3SAndroid Build Coastguard Worker #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
1168*bed243d3SAndroid Build Coastguard Worker
1169*bed243d3SAndroid Build Coastguard Worker /// Tests whether the specified bits in a 128-bit integer vector are all
1170*bed243d3SAndroid Build Coastguard Worker /// zeros.
1171*bed243d3SAndroid Build Coastguard Worker ///
1172*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1173*bed243d3SAndroid Build Coastguard Worker ///
1174*bed243d3SAndroid Build Coastguard Worker /// \code
1175*bed243d3SAndroid Build Coastguard Worker /// int _mm_test_all_zeros(__m128i M, __m128i V);
1176*bed243d3SAndroid Build Coastguard Worker /// \endcode
1177*bed243d3SAndroid Build Coastguard Worker ///
1178*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.
1179*bed243d3SAndroid Build Coastguard Worker ///
1180*bed243d3SAndroid Build Coastguard Worker /// \param M
1181*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing the bits to be tested.
1182*bed243d3SAndroid Build Coastguard Worker /// \param V
1183*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector selecting which bits to test in operand \a M.
1184*bed243d3SAndroid Build Coastguard Worker /// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
1185*bed243d3SAndroid Build Coastguard Worker #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
1186*bed243d3SAndroid Build Coastguard Worker
1187*bed243d3SAndroid Build Coastguard Worker /* SSE4 64-bit Packed Integer Comparisons. */
1188*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding 64-bit values of the 128-bit
1189*bed243d3SAndroid Build Coastguard Worker /// integer vectors for equality.
1190*bed243d3SAndroid Build Coastguard Worker ///
1191*bed243d3SAndroid Build Coastguard Worker /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
1192*bed243d3SAndroid Build Coastguard Worker ///
1193*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1194*bed243d3SAndroid Build Coastguard Worker ///
1195*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.
1196*bed243d3SAndroid Build Coastguard Worker ///
1197*bed243d3SAndroid Build Coastguard Worker /// \param __V1
1198*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
1199*bed243d3SAndroid Build Coastguard Worker /// \param __V2
1200*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
1201*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the comparison results.
_mm_cmpeq_epi64(__m128i __V1,__m128i __V2)1202*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi64(__m128i __V1,
1203*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
1204*bed243d3SAndroid Build Coastguard Worker return (__m128i)((__v2di)__V1 == (__v2di)__V2);
1205*bed243d3SAndroid Build Coastguard Worker }
1206*bed243d3SAndroid Build Coastguard Worker
1207*bed243d3SAndroid Build Coastguard Worker /* SSE4 Packed Integer Sign-Extension. */
1208*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower eight 8-bit integer elements of a
1209*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
1210*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
1211*bed243d3SAndroid Build Coastguard Worker /// are unused.
1212*bed243d3SAndroid Build Coastguard Worker ///
1213*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1214*bed243d3SAndroid Build Coastguard Worker ///
1215*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.
1216*bed243d3SAndroid Build Coastguard Worker ///
1217*bed243d3SAndroid Build Coastguard Worker /// \param __V
1218*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
1219*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 16-bit values.
1220*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values.
_mm_cvtepi8_epi16(__m128i __V)1221*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) {
1222*bed243d3SAndroid Build Coastguard Worker /* This function always performs a signed extension, but __v16qi is a char
1223*bed243d3SAndroid Build Coastguard Worker which may be signed or unsigned, so use __v16qs. */
1224*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1225*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6,
1226*bed243d3SAndroid Build Coastguard Worker 7),
1227*bed243d3SAndroid Build Coastguard Worker __v8hi);
1228*bed243d3SAndroid Build Coastguard Worker }
1229*bed243d3SAndroid Build Coastguard Worker
1230*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower four 8-bit integer elements of a
1231*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
1232*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [4 x i32]. The upper twelve elements of the input
1233*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1234*bed243d3SAndroid Build Coastguard Worker ///
1235*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1236*bed243d3SAndroid Build Coastguard Worker ///
1237*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
1238*bed243d3SAndroid Build Coastguard Worker ///
1239*bed243d3SAndroid Build Coastguard Worker /// \param __V
1240*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
1241*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 32-bit values.
1242*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
_mm_cvtepi8_epi32(__m128i __V)1243*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V) {
1244*bed243d3SAndroid Build Coastguard Worker /* This function always performs a signed extension, but __v16qi is a char
1245*bed243d3SAndroid Build Coastguard Worker which may be signed or unsigned, so use __v16qs. */
1246*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1247*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
1248*bed243d3SAndroid Build Coastguard Worker }
1249*bed243d3SAndroid Build Coastguard Worker
1250*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower two 8-bit integer elements of a
1251*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
1252*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
1253*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1254*bed243d3SAndroid Build Coastguard Worker ///
1255*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1256*bed243d3SAndroid Build Coastguard Worker ///
1257*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
1258*bed243d3SAndroid Build Coastguard Worker ///
1259*bed243d3SAndroid Build Coastguard Worker /// \param __V
1260*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
1261*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 64-bit values.
1262*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
_mm_cvtepi8_epi64(__m128i __V)1263*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V) {
1264*bed243d3SAndroid Build Coastguard Worker /* This function always performs a signed extension, but __v16qi is a char
1265*bed243d3SAndroid Build Coastguard Worker which may be signed or unsigned, so use __v16qs. */
1266*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1267*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
1268*bed243d3SAndroid Build Coastguard Worker }
1269*bed243d3SAndroid Build Coastguard Worker
1270*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower four 16-bit integer elements of a
1271*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
1272*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [4 x i32]. The upper four elements of the input
1273*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1274*bed243d3SAndroid Build Coastguard Worker ///
1275*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1276*bed243d3SAndroid Build Coastguard Worker ///
1277*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
1278*bed243d3SAndroid Build Coastguard Worker ///
1279*bed243d3SAndroid Build Coastguard Worker /// \param __V
1280*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
1281*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 32-bit values.
1282*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
_mm_cvtepi16_epi32(__m128i __V)1283*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V) {
1284*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1285*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
1286*bed243d3SAndroid Build Coastguard Worker }
1287*bed243d3SAndroid Build Coastguard Worker
1288*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower two 16-bit integer elements of a
1289*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
1290*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper six elements of the input
1291*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1292*bed243d3SAndroid Build Coastguard Worker ///
1293*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1294*bed243d3SAndroid Build Coastguard Worker ///
1295*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
1296*bed243d3SAndroid Build Coastguard Worker ///
1297*bed243d3SAndroid Build Coastguard Worker /// \param __V
1298*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
1299*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 64-bit values.
1300*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
_mm_cvtepi16_epi64(__m128i __V)1301*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V) {
1302*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1303*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
1304*bed243d3SAndroid Build Coastguard Worker }
1305*bed243d3SAndroid Build Coastguard Worker
1306*bed243d3SAndroid Build Coastguard Worker /// Sign-extends each of the lower two 32-bit integer elements of a
1307*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
1308*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
1309*bed243d3SAndroid Build Coastguard Worker /// are unused.
1310*bed243d3SAndroid Build Coastguard Worker ///
1311*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1312*bed243d3SAndroid Build Coastguard Worker ///
1313*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
1314*bed243d3SAndroid Build Coastguard Worker ///
1315*bed243d3SAndroid Build Coastguard Worker /// \param __V
1316*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
1317*bed243d3SAndroid Build Coastguard Worker /// sign-extended to 64-bit values.
1318*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
_mm_cvtepi32_epi64(__m128i __V)1319*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V) {
1320*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1321*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
1322*bed243d3SAndroid Build Coastguard Worker }
1323*bed243d3SAndroid Build Coastguard Worker
1324*bed243d3SAndroid Build Coastguard Worker /* SSE4 Packed Integer Zero-Extension. */
1325*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower eight 8-bit integer elements of a
1326*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
1327*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
1328*bed243d3SAndroid Build Coastguard Worker /// are unused.
1329*bed243d3SAndroid Build Coastguard Worker ///
1330*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1331*bed243d3SAndroid Build Coastguard Worker ///
1332*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
1333*bed243d3SAndroid Build Coastguard Worker ///
1334*bed243d3SAndroid Build Coastguard Worker /// \param __V
1335*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
1336*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 16-bit values.
1337*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
_mm_cvtepu8_epi16(__m128i __V)1338*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) {
1339*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1340*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6,
1341*bed243d3SAndroid Build Coastguard Worker 7),
1342*bed243d3SAndroid Build Coastguard Worker __v8hi);
1343*bed243d3SAndroid Build Coastguard Worker }
1344*bed243d3SAndroid Build Coastguard Worker
1345*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower four 8-bit integer elements of a
1346*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
1347*bed243d3SAndroid Build Coastguard Worker /// 128-bit vector of [4 x i32]. The upper twelve elements of the input
1348*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1349*bed243d3SAndroid Build Coastguard Worker ///
1350*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1351*bed243d3SAndroid Build Coastguard Worker ///
1352*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
1353*bed243d3SAndroid Build Coastguard Worker ///
1354*bed243d3SAndroid Build Coastguard Worker /// \param __V
1355*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
1356*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 32-bit values.
1357*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
_mm_cvtepu8_epi32(__m128i __V)1358*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) {
1359*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1360*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
1361*bed243d3SAndroid Build Coastguard Worker }
1362*bed243d3SAndroid Build Coastguard Worker
1363*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower two 8-bit integer elements of a
1364*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
1365*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
1366*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1367*bed243d3SAndroid Build Coastguard Worker ///
1368*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1369*bed243d3SAndroid Build Coastguard Worker ///
1370*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
1371*bed243d3SAndroid Build Coastguard Worker ///
1372*bed243d3SAndroid Build Coastguard Worker /// \param __V
1373*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
1374*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 64-bit values.
1375*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
_mm_cvtepu8_epi64(__m128i __V)1376*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) {
1377*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1378*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
1379*bed243d3SAndroid Build Coastguard Worker }
1380*bed243d3SAndroid Build Coastguard Worker
1381*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower four 16-bit integer elements of a
1382*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
1383*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [4 x i32]. The upper four elements of the input
1384*bed243d3SAndroid Build Coastguard Worker /// vector are unused.
1385*bed243d3SAndroid Build Coastguard Worker ///
1386*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1387*bed243d3SAndroid Build Coastguard Worker ///
1388*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
1389*bed243d3SAndroid Build Coastguard Worker ///
1390*bed243d3SAndroid Build Coastguard Worker /// \param __V
1391*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
1392*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 32-bit values.
1393*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
_mm_cvtepu16_epi32(__m128i __V)1394*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) {
1395*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1396*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
1397*bed243d3SAndroid Build Coastguard Worker }
1398*bed243d3SAndroid Build Coastguard Worker
1399*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower two 16-bit integer elements of a
1400*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
1401*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper six elements of the input vector
1402*bed243d3SAndroid Build Coastguard Worker /// are unused.
1403*bed243d3SAndroid Build Coastguard Worker ///
1404*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1405*bed243d3SAndroid Build Coastguard Worker ///
1406*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
1407*bed243d3SAndroid Build Coastguard Worker ///
1408*bed243d3SAndroid Build Coastguard Worker /// \param __V
1409*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
1410*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 64-bit values.
1411*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
_mm_cvtepu16_epi64(__m128i __V)1412*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) {
1413*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1414*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
1415*bed243d3SAndroid Build Coastguard Worker }
1416*bed243d3SAndroid Build Coastguard Worker
1417*bed243d3SAndroid Build Coastguard Worker /// Zero-extends each of the lower two 32-bit integer elements of a
1418*bed243d3SAndroid Build Coastguard Worker /// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
1419*bed243d3SAndroid Build Coastguard Worker /// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
1420*bed243d3SAndroid Build Coastguard Worker /// are unused.
1421*bed243d3SAndroid Build Coastguard Worker ///
1422*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1423*bed243d3SAndroid Build Coastguard Worker ///
1424*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
1425*bed243d3SAndroid Build Coastguard Worker ///
1426*bed243d3SAndroid Build Coastguard Worker /// \param __V
1427*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
1428*bed243d3SAndroid Build Coastguard Worker /// zero-extended to 64-bit values.
1429*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
_mm_cvtepu32_epi64(__m128i __V)1430*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
1431*bed243d3SAndroid Build Coastguard Worker return (__m128i) __builtin_convertvector(
1432*bed243d3SAndroid Build Coastguard Worker __builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);
1433*bed243d3SAndroid Build Coastguard Worker }
1434*bed243d3SAndroid Build Coastguard Worker
1435*bed243d3SAndroid Build Coastguard Worker /* SSE4 Pack with Unsigned Saturation. */
1436*bed243d3SAndroid Build Coastguard Worker /// Converts, with saturation, 32-bit signed integers from both 128-bit integer
1437*bed243d3SAndroid Build Coastguard Worker /// vector operands into 16-bit unsigned integers, and returns the packed
1438*bed243d3SAndroid Build Coastguard Worker /// result.
1439*bed243d3SAndroid Build Coastguard Worker ///
1440*bed243d3SAndroid Build Coastguard Worker /// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
1441*bed243d3SAndroid Build Coastguard Worker /// 0x0000 are saturated to 0x0000.
1442*bed243d3SAndroid Build Coastguard Worker ///
1443*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1444*bed243d3SAndroid Build Coastguard Worker ///
1445*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
1446*bed243d3SAndroid Build Coastguard Worker ///
1447*bed243d3SAndroid Build Coastguard Worker /// \param __V1
1448*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
1449*bed243d3SAndroid Build Coastguard Worker /// written to the lower 64 bits of the result.
1450*bed243d3SAndroid Build Coastguard Worker /// \param __V2
1451*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
1452*bed243d3SAndroid Build Coastguard Worker /// written to the higher 64 bits of the result.
1453*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [8 x i16] containing the converted values.
_mm_packus_epi32(__m128i __V1,__m128i __V2)1454*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
1455*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
1456*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
1457*bed243d3SAndroid Build Coastguard Worker }
1458*bed243d3SAndroid Build Coastguard Worker
1459*bed243d3SAndroid Build Coastguard Worker /* SSE4 Multiple Packed Sums of Absolute Difference. */
1460*bed243d3SAndroid Build Coastguard Worker /// Subtracts 8-bit unsigned integer values and computes the absolute
1461*bed243d3SAndroid Build Coastguard Worker /// values of the differences to the corresponding bits in the destination.
1462*bed243d3SAndroid Build Coastguard Worker /// Then sums of the absolute differences are returned according to the bit
1463*bed243d3SAndroid Build Coastguard Worker /// fields in the immediate operand.
1464*bed243d3SAndroid Build Coastguard Worker ///
1465*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1466*bed243d3SAndroid Build Coastguard Worker ///
1467*bed243d3SAndroid Build Coastguard Worker /// \code
1468*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);
1469*bed243d3SAndroid Build Coastguard Worker /// \endcode
1470*bed243d3SAndroid Build Coastguard Worker ///
1471*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.
1472*bed243d3SAndroid Build Coastguard Worker ///
1473*bed243d3SAndroid Build Coastguard Worker /// \param X
1474*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
1475*bed243d3SAndroid Build Coastguard Worker /// \param Y
1476*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [16 x i8].
1477*bed243d3SAndroid Build Coastguard Worker /// \param M
1478*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying how the absolute differences are to
1479*bed243d3SAndroid Build Coastguard Worker /// be calculated, according to the following algorithm:
1480*bed243d3SAndroid Build Coastguard Worker /// \code
1481*bed243d3SAndroid Build Coastguard Worker /// // M2 represents bit 2 of the immediate operand
1482*bed243d3SAndroid Build Coastguard Worker /// // M10 represents bits [1:0] of the immediate operand
1483*bed243d3SAndroid Build Coastguard Worker /// i = M2 * 4;
1484*bed243d3SAndroid Build Coastguard Worker /// j = M10 * 4;
1485*bed243d3SAndroid Build Coastguard Worker /// for (k = 0; k < 8; k = k + 1) {
1486*bed243d3SAndroid Build Coastguard Worker /// d0 = abs(X[i + k + 0] - Y[j + 0]);
1487*bed243d3SAndroid Build Coastguard Worker /// d1 = abs(X[i + k + 1] - Y[j + 1]);
1488*bed243d3SAndroid Build Coastguard Worker /// d2 = abs(X[i + k + 2] - Y[j + 2]);
1489*bed243d3SAndroid Build Coastguard Worker /// d3 = abs(X[i + k + 3] - Y[j + 3]);
1490*bed243d3SAndroid Build Coastguard Worker /// r[k] = d0 + d1 + d2 + d3;
1491*bed243d3SAndroid Build Coastguard Worker /// }
1492*bed243d3SAndroid Build Coastguard Worker /// \endcode
1493*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the sums of the sets of
1494*bed243d3SAndroid Build Coastguard Worker /// absolute differences between both operands.
1495*bed243d3SAndroid Build Coastguard Worker #define _mm_mpsadbw_epu8(X, Y, M) \
1496*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
1497*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(Y), (M)))
1498*bed243d3SAndroid Build Coastguard Worker
1499*bed243d3SAndroid Build Coastguard Worker /// Finds the minimum unsigned 16-bit element in the input 128-bit
1500*bed243d3SAndroid Build Coastguard Worker /// vector of [8 x u16] and returns it and along with its index.
1501*bed243d3SAndroid Build Coastguard Worker ///
1502*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1503*bed243d3SAndroid Build Coastguard Worker ///
1504*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
1505*bed243d3SAndroid Build Coastguard Worker /// instruction.
1506*bed243d3SAndroid Build Coastguard Worker ///
1507*bed243d3SAndroid Build Coastguard Worker /// \param __V
1508*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [8 x u16].
1509*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit value where bits [15:0] contain the minimum value found
1510*bed243d3SAndroid Build Coastguard Worker /// in parameter \a __V, bits [18:16] contain the index of the minimum value
1511*bed243d3SAndroid Build Coastguard Worker /// and the remaining bits are set to 0.
_mm_minpos_epu16(__m128i __V)1512*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
1513*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V);
1514*bed243d3SAndroid Build Coastguard Worker }
1515*bed243d3SAndroid Build Coastguard Worker
1516*bed243d3SAndroid Build Coastguard Worker /* Handle the sse4.2 definitions here. */
1517*bed243d3SAndroid Build Coastguard Worker
1518*bed243d3SAndroid Build Coastguard Worker /* These definitions are normally in nmmintrin.h, but gcc puts them in here
1519*bed243d3SAndroid Build Coastguard Worker so we'll do the same. */
1520*bed243d3SAndroid Build Coastguard Worker
1521*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS
1522*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS \
1523*bed243d3SAndroid Build Coastguard Worker __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
1524*bed243d3SAndroid Build Coastguard Worker
1525*bed243d3SAndroid Build Coastguard Worker /* These specify the type of data that we're comparing. */
1526*bed243d3SAndroid Build Coastguard Worker #define _SIDD_UBYTE_OPS 0x00
1527*bed243d3SAndroid Build Coastguard Worker #define _SIDD_UWORD_OPS 0x01
1528*bed243d3SAndroid Build Coastguard Worker #define _SIDD_SBYTE_OPS 0x02
1529*bed243d3SAndroid Build Coastguard Worker #define _SIDD_SWORD_OPS 0x03
1530*bed243d3SAndroid Build Coastguard Worker
1531*bed243d3SAndroid Build Coastguard Worker /* These specify the type of comparison operation. */
1532*bed243d3SAndroid Build Coastguard Worker #define _SIDD_CMP_EQUAL_ANY 0x00
1533*bed243d3SAndroid Build Coastguard Worker #define _SIDD_CMP_RANGES 0x04
1534*bed243d3SAndroid Build Coastguard Worker #define _SIDD_CMP_EQUAL_EACH 0x08
1535*bed243d3SAndroid Build Coastguard Worker #define _SIDD_CMP_EQUAL_ORDERED 0x0c
1536*bed243d3SAndroid Build Coastguard Worker
1537*bed243d3SAndroid Build Coastguard Worker /* These macros specify the polarity of the operation. */
1538*bed243d3SAndroid Build Coastguard Worker #define _SIDD_POSITIVE_POLARITY 0x00
1539*bed243d3SAndroid Build Coastguard Worker #define _SIDD_NEGATIVE_POLARITY 0x10
1540*bed243d3SAndroid Build Coastguard Worker #define _SIDD_MASKED_POSITIVE_POLARITY 0x20
1541*bed243d3SAndroid Build Coastguard Worker #define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
1542*bed243d3SAndroid Build Coastguard Worker
1543*bed243d3SAndroid Build Coastguard Worker /* These macros are used in _mm_cmpXstri() to specify the return. */
1544*bed243d3SAndroid Build Coastguard Worker #define _SIDD_LEAST_SIGNIFICANT 0x00
1545*bed243d3SAndroid Build Coastguard Worker #define _SIDD_MOST_SIGNIFICANT 0x40
1546*bed243d3SAndroid Build Coastguard Worker
1547*bed243d3SAndroid Build Coastguard Worker /* These macros are used in _mm_cmpXstri() to specify the return. */
1548*bed243d3SAndroid Build Coastguard Worker #define _SIDD_BIT_MASK 0x00
1549*bed243d3SAndroid Build Coastguard Worker #define _SIDD_UNIT_MASK 0x40
1550*bed243d3SAndroid Build Coastguard Worker
1551*bed243d3SAndroid Build Coastguard Worker /* SSE4.2 Packed Comparison Intrinsics. */
1552*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1553*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1554*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns a 128-bit integer vector representing the result
1555*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison.
1556*bed243d3SAndroid Build Coastguard Worker ///
1557*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1558*bed243d3SAndroid Build Coastguard Worker ///
1559*bed243d3SAndroid Build Coastguard Worker /// \code
1560*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);
1561*bed243d3SAndroid Build Coastguard Worker /// \endcode
1562*bed243d3SAndroid Build Coastguard Worker ///
1563*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>
1564*bed243d3SAndroid Build Coastguard Worker /// instruction.
1565*bed243d3SAndroid Build Coastguard Worker ///
1566*bed243d3SAndroid Build Coastguard Worker /// \param A
1567*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1568*bed243d3SAndroid Build Coastguard Worker /// compared.
1569*bed243d3SAndroid Build Coastguard Worker /// \param B
1570*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1571*bed243d3SAndroid Build Coastguard Worker /// compared.
1572*bed243d3SAndroid Build Coastguard Worker /// \param M
1573*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1574*bed243d3SAndroid Build Coastguard Worker /// words, the type of comparison to perform, and the format of the return
1575*bed243d3SAndroid Build Coastguard Worker /// value. \n
1576*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1577*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1578*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1579*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1580*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1581*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1582*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1583*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1584*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1585*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1586*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1587*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1588*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1589*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
1590*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1591*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1592*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1593*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1594*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1595*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1596*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1597*bed243d3SAndroid Build Coastguard Worker /// Bit [6]: Determines whether the result is zero-extended or expanded to 16
1598*bed243d3SAndroid Build Coastguard Worker /// bytes. \n
1599*bed243d3SAndroid Build Coastguard Worker /// 0: The result is zero-extended to 16 bytes. \n
1600*bed243d3SAndroid Build Coastguard Worker /// 1: The result is expanded to 16 bytes (this expansion is performed by
1601*bed243d3SAndroid Build Coastguard Worker /// repeating each bit 8 or 16 times).
1602*bed243d3SAndroid Build Coastguard Worker /// \returns Returns a 128-bit integer vector representing the result mask of
1603*bed243d3SAndroid Build Coastguard Worker /// the comparison.
1604*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistrm(A, B, M) \
1605*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
1606*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1607*bed243d3SAndroid Build Coastguard Worker
1608*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1609*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1610*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns an integer representing the result index of the
1611*bed243d3SAndroid Build Coastguard Worker /// comparison.
1612*bed243d3SAndroid Build Coastguard Worker ///
1613*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1614*bed243d3SAndroid Build Coastguard Worker ///
1615*bed243d3SAndroid Build Coastguard Worker /// \code
1616*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistri(__m128i A, __m128i B, const int M);
1617*bed243d3SAndroid Build Coastguard Worker /// \endcode
1618*bed243d3SAndroid Build Coastguard Worker ///
1619*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1620*bed243d3SAndroid Build Coastguard Worker /// instruction.
1621*bed243d3SAndroid Build Coastguard Worker ///
1622*bed243d3SAndroid Build Coastguard Worker /// \param A
1623*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1624*bed243d3SAndroid Build Coastguard Worker /// compared.
1625*bed243d3SAndroid Build Coastguard Worker /// \param B
1626*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1627*bed243d3SAndroid Build Coastguard Worker /// compared.
1628*bed243d3SAndroid Build Coastguard Worker /// \param M
1629*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1630*bed243d3SAndroid Build Coastguard Worker /// words, the type of comparison to perform, and the format of the return
1631*bed243d3SAndroid Build Coastguard Worker /// value. \n
1632*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1633*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1634*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1635*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1636*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1637*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1638*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1639*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1640*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1641*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1642*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1643*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1644*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1645*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search B for substring matches of \a A. \n
1646*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1647*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1648*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1649*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1650*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1651*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1652*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1653*bed243d3SAndroid Build Coastguard Worker /// Bit [6]: Determines whether the index of the lowest set bit or the
1654*bed243d3SAndroid Build Coastguard Worker /// highest set bit is returned. \n
1655*bed243d3SAndroid Build Coastguard Worker /// 0: The index of the least significant set bit. \n
1656*bed243d3SAndroid Build Coastguard Worker /// 1: The index of the most significant set bit. \n
1657*bed243d3SAndroid Build Coastguard Worker /// \returns Returns an integer representing the result index of the comparison.
1658*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistri(A, B, M) \
1659*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
1660*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1661*bed243d3SAndroid Build Coastguard Worker
1662*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1663*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
1664*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns a 128-bit integer vector representing the result
1665*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison.
1666*bed243d3SAndroid Build Coastguard Worker ///
1667*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1668*bed243d3SAndroid Build Coastguard Worker ///
1669*bed243d3SAndroid Build Coastguard Worker /// \code
1670*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);
1671*bed243d3SAndroid Build Coastguard Worker /// \endcode
1672*bed243d3SAndroid Build Coastguard Worker ///
1673*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>
1674*bed243d3SAndroid Build Coastguard Worker /// instruction.
1675*bed243d3SAndroid Build Coastguard Worker ///
1676*bed243d3SAndroid Build Coastguard Worker /// \param A
1677*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1678*bed243d3SAndroid Build Coastguard Worker /// compared.
1679*bed243d3SAndroid Build Coastguard Worker /// \param LA
1680*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
1681*bed243d3SAndroid Build Coastguard Worker /// \param B
1682*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1683*bed243d3SAndroid Build Coastguard Worker /// compared.
1684*bed243d3SAndroid Build Coastguard Worker /// \param LB
1685*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
1686*bed243d3SAndroid Build Coastguard Worker /// \param M
1687*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1688*bed243d3SAndroid Build Coastguard Worker /// words, the type of comparison to perform, and the format of the return
1689*bed243d3SAndroid Build Coastguard Worker /// value. \n
1690*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1691*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1692*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1693*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1694*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1695*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1696*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1697*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1698*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1699*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1700*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1701*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1702*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1703*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
1704*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1705*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1706*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1707*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1708*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1709*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1710*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1711*bed243d3SAndroid Build Coastguard Worker /// Bit [6]: Determines whether the result is zero-extended or expanded to 16
1712*bed243d3SAndroid Build Coastguard Worker /// bytes. \n
1713*bed243d3SAndroid Build Coastguard Worker /// 0: The result is zero-extended to 16 bytes. \n
1714*bed243d3SAndroid Build Coastguard Worker /// 1: The result is expanded to 16 bytes (this expansion is performed by
1715*bed243d3SAndroid Build Coastguard Worker /// repeating each bit 8 or 16 times). \n
1716*bed243d3SAndroid Build Coastguard Worker /// \returns Returns a 128-bit integer vector representing the result mask of
1717*bed243d3SAndroid Build Coastguard Worker /// the comparison.
1718*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestrm(A, LA, B, LB, M) \
1719*bed243d3SAndroid Build Coastguard Worker ((__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
1720*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
1721*bed243d3SAndroid Build Coastguard Worker (int)(M)))
1722*bed243d3SAndroid Build Coastguard Worker
1723*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1724*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
1725*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns an integer representing the result index of the
1726*bed243d3SAndroid Build Coastguard Worker /// comparison.
1727*bed243d3SAndroid Build Coastguard Worker ///
1728*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1729*bed243d3SAndroid Build Coastguard Worker ///
1730*bed243d3SAndroid Build Coastguard Worker /// \code
1731*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);
1732*bed243d3SAndroid Build Coastguard Worker /// \endcode
1733*bed243d3SAndroid Build Coastguard Worker ///
1734*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
1735*bed243d3SAndroid Build Coastguard Worker /// instruction.
1736*bed243d3SAndroid Build Coastguard Worker ///
1737*bed243d3SAndroid Build Coastguard Worker /// \param A
1738*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1739*bed243d3SAndroid Build Coastguard Worker /// compared.
1740*bed243d3SAndroid Build Coastguard Worker /// \param LA
1741*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
1742*bed243d3SAndroid Build Coastguard Worker /// \param B
1743*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1744*bed243d3SAndroid Build Coastguard Worker /// compared.
1745*bed243d3SAndroid Build Coastguard Worker /// \param LB
1746*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
1747*bed243d3SAndroid Build Coastguard Worker /// \param M
1748*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1749*bed243d3SAndroid Build Coastguard Worker /// words, the type of comparison to perform, and the format of the return
1750*bed243d3SAndroid Build Coastguard Worker /// value. \n
1751*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1752*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1753*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1754*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1755*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1756*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1757*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1758*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1759*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1760*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1761*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1762*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1763*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1764*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search B for substring matches of \a A. \n
1765*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1766*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1767*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1768*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1769*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1770*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1771*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1772*bed243d3SAndroid Build Coastguard Worker /// Bit [6]: Determines whether the index of the lowest set bit or the
1773*bed243d3SAndroid Build Coastguard Worker /// highest set bit is returned. \n
1774*bed243d3SAndroid Build Coastguard Worker /// 0: The index of the least significant set bit. \n
1775*bed243d3SAndroid Build Coastguard Worker /// 1: The index of the most significant set bit. \n
1776*bed243d3SAndroid Build Coastguard Worker /// \returns Returns an integer representing the result index of the comparison.
1777*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestri(A, LA, B, LB, M) \
1778*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
1779*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
1780*bed243d3SAndroid Build Coastguard Worker (int)(M)))
1781*bed243d3SAndroid Build Coastguard Worker
1782*bed243d3SAndroid Build Coastguard Worker /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
1783*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1784*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1785*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
1786*bed243d3SAndroid Build Coastguard Worker /// string in \a B is the maximum, otherwise, returns 0.
1787*bed243d3SAndroid Build Coastguard Worker ///
1788*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1789*bed243d3SAndroid Build Coastguard Worker ///
1790*bed243d3SAndroid Build Coastguard Worker /// \code
1791*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistra(__m128i A, __m128i B, const int M);
1792*bed243d3SAndroid Build Coastguard Worker /// \endcode
1793*bed243d3SAndroid Build Coastguard Worker ///
1794*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1795*bed243d3SAndroid Build Coastguard Worker /// instruction.
1796*bed243d3SAndroid Build Coastguard Worker ///
1797*bed243d3SAndroid Build Coastguard Worker /// \param A
1798*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1799*bed243d3SAndroid Build Coastguard Worker /// compared.
1800*bed243d3SAndroid Build Coastguard Worker /// \param B
1801*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1802*bed243d3SAndroid Build Coastguard Worker /// compared.
1803*bed243d3SAndroid Build Coastguard Worker /// \param M
1804*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1805*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
1806*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1807*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1808*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1809*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1810*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1811*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1812*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1813*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1814*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1815*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1816*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1817*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1818*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1819*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
1820*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1821*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1822*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1823*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1824*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1825*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1826*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1827*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the bit mask is zero and the length of the string in
1828*bed243d3SAndroid Build Coastguard Worker /// \a B is the maximum; otherwise, returns 0.
1829*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistra(A, B, M) \
1830*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
1831*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1832*bed243d3SAndroid Build Coastguard Worker
1833*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1834*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1835*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns
1836*bed243d3SAndroid Build Coastguard Worker /// 0.
1837*bed243d3SAndroid Build Coastguard Worker ///
1838*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1839*bed243d3SAndroid Build Coastguard Worker ///
1840*bed243d3SAndroid Build Coastguard Worker /// \code
1841*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistrc(__m128i A, __m128i B, const int M);
1842*bed243d3SAndroid Build Coastguard Worker /// \endcode
1843*bed243d3SAndroid Build Coastguard Worker ///
1844*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1845*bed243d3SAndroid Build Coastguard Worker /// instruction.
1846*bed243d3SAndroid Build Coastguard Worker ///
1847*bed243d3SAndroid Build Coastguard Worker /// \param A
1848*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1849*bed243d3SAndroid Build Coastguard Worker /// compared.
1850*bed243d3SAndroid Build Coastguard Worker /// \param B
1851*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1852*bed243d3SAndroid Build Coastguard Worker /// compared.
1853*bed243d3SAndroid Build Coastguard Worker /// \param M
1854*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1855*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
1856*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1857*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1858*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1859*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1860*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1861*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1862*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1863*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1864*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1865*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1866*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1867*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1868*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1869*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search B for substring matches of \a A. \n
1870*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1871*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1872*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1873*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1874*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1875*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1876*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B.
1877*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.
1878*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistrc(A, B, M) \
1879*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
1880*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1881*bed243d3SAndroid Build Coastguard Worker
1882*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1883*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1884*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns bit 0 of the resulting bit mask.
1885*bed243d3SAndroid Build Coastguard Worker ///
1886*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1887*bed243d3SAndroid Build Coastguard Worker ///
1888*bed243d3SAndroid Build Coastguard Worker /// \code
1889*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistro(__m128i A, __m128i B, const int M);
1890*bed243d3SAndroid Build Coastguard Worker /// \endcode
1891*bed243d3SAndroid Build Coastguard Worker ///
1892*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1893*bed243d3SAndroid Build Coastguard Worker /// instruction.
1894*bed243d3SAndroid Build Coastguard Worker ///
1895*bed243d3SAndroid Build Coastguard Worker /// \param A
1896*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1897*bed243d3SAndroid Build Coastguard Worker /// compared.
1898*bed243d3SAndroid Build Coastguard Worker /// \param B
1899*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1900*bed243d3SAndroid Build Coastguard Worker /// compared.
1901*bed243d3SAndroid Build Coastguard Worker /// \param M
1902*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1903*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
1904*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1905*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1906*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1907*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1908*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1909*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1910*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1911*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1912*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1913*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1914*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1915*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1916*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1917*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search B for substring matches of \a A. \n
1918*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1919*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1920*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1921*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1922*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1923*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1924*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1925*bed243d3SAndroid Build Coastguard Worker /// \returns Returns bit 0 of the resulting bit mask.
1926*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistro(A, B, M) \
1927*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
1928*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1929*bed243d3SAndroid Build Coastguard Worker
1930*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1931*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1932*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the length of the string in \a A is less than
1933*bed243d3SAndroid Build Coastguard Worker /// the maximum, otherwise, returns 0.
1934*bed243d3SAndroid Build Coastguard Worker ///
1935*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1936*bed243d3SAndroid Build Coastguard Worker ///
1937*bed243d3SAndroid Build Coastguard Worker /// \code
1938*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistrs(__m128i A, __m128i B, const int M);
1939*bed243d3SAndroid Build Coastguard Worker /// \endcode
1940*bed243d3SAndroid Build Coastguard Worker ///
1941*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1942*bed243d3SAndroid Build Coastguard Worker /// instruction.
1943*bed243d3SAndroid Build Coastguard Worker ///
1944*bed243d3SAndroid Build Coastguard Worker /// \param A
1945*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1946*bed243d3SAndroid Build Coastguard Worker /// compared.
1947*bed243d3SAndroid Build Coastguard Worker /// \param B
1948*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1949*bed243d3SAndroid Build Coastguard Worker /// compared.
1950*bed243d3SAndroid Build Coastguard Worker /// \param M
1951*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
1952*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
1953*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
1954*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
1955*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
1956*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
1957*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
1958*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
1959*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
1960*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
1961*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
1962*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
1963*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
1964*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
1965*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
1966*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
1967*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
1968*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
1969*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
1970*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
1971*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
1972*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
1973*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
1974*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the length of the string in \a A is less than the
1975*bed243d3SAndroid Build Coastguard Worker /// maximum, otherwise, returns 0.
1976*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistrs(A, B, M) \
1977*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
1978*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
1979*bed243d3SAndroid Build Coastguard Worker
1980*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
1981*bed243d3SAndroid Build Coastguard Worker /// data with implicitly defined lengths that is contained in source operands
1982*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the length of the string in \a B is less than
1983*bed243d3SAndroid Build Coastguard Worker /// the maximum, otherwise, returns 0.
1984*bed243d3SAndroid Build Coastguard Worker ///
1985*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1986*bed243d3SAndroid Build Coastguard Worker ///
1987*bed243d3SAndroid Build Coastguard Worker /// \code
1988*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpistrz(__m128i A, __m128i B, const int M);
1989*bed243d3SAndroid Build Coastguard Worker /// \endcode
1990*bed243d3SAndroid Build Coastguard Worker ///
1991*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>
1992*bed243d3SAndroid Build Coastguard Worker /// instruction.
1993*bed243d3SAndroid Build Coastguard Worker ///
1994*bed243d3SAndroid Build Coastguard Worker /// \param A
1995*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1996*bed243d3SAndroid Build Coastguard Worker /// compared.
1997*bed243d3SAndroid Build Coastguard Worker /// \param B
1998*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
1999*bed243d3SAndroid Build Coastguard Worker /// compared.
2000*bed243d3SAndroid Build Coastguard Worker /// \param M
2001*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2002*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2003*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2004*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2005*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2006*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2007*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2008*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2009*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2010*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2011*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2012*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2013*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2014*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2015*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2016*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2017*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
2018*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2019*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2020*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2021*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2022*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2023*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B.
2024*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the length of the string in \a B is less than the
2025*bed243d3SAndroid Build Coastguard Worker /// maximum, otherwise, returns 0.
2026*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpistrz(A, B, M) \
2027*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
2028*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(M)))
2029*bed243d3SAndroid Build Coastguard Worker
2030*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
2031*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
2032*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
2033*bed243d3SAndroid Build Coastguard Worker /// string in \a B is the maximum, otherwise, returns 0.
2034*bed243d3SAndroid Build Coastguard Worker ///
2035*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2036*bed243d3SAndroid Build Coastguard Worker ///
2037*bed243d3SAndroid Build Coastguard Worker /// \code
2038*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);
2039*bed243d3SAndroid Build Coastguard Worker /// \endcode
2040*bed243d3SAndroid Build Coastguard Worker ///
2041*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
2042*bed243d3SAndroid Build Coastguard Worker /// instruction.
2043*bed243d3SAndroid Build Coastguard Worker ///
2044*bed243d3SAndroid Build Coastguard Worker /// \param A
2045*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2046*bed243d3SAndroid Build Coastguard Worker /// compared.
2047*bed243d3SAndroid Build Coastguard Worker /// \param LA
2048*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
2049*bed243d3SAndroid Build Coastguard Worker /// \param B
2050*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2051*bed243d3SAndroid Build Coastguard Worker /// compared.
2052*bed243d3SAndroid Build Coastguard Worker /// \param LB
2053*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
2054*bed243d3SAndroid Build Coastguard Worker /// \param M
2055*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2056*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2057*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2058*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2059*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2060*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2061*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2062*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2063*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2064*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2065*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2066*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2067*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2068*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2069*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2070*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2071*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
2072*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2073*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2074*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2075*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2076*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2077*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B.
2078*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the bit mask is zero and the length of the string in
2079*bed243d3SAndroid Build Coastguard Worker /// \a B is the maximum, otherwise, returns 0.
2080*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestra(A, LA, B, LB, M) \
2081*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
2082*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
2083*bed243d3SAndroid Build Coastguard Worker (int)(M)))
2084*bed243d3SAndroid Build Coastguard Worker
2085*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
2086*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
2087*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise,
2088*bed243d3SAndroid Build Coastguard Worker /// returns 0.
2089*bed243d3SAndroid Build Coastguard Worker ///
2090*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2091*bed243d3SAndroid Build Coastguard Worker ///
2092*bed243d3SAndroid Build Coastguard Worker /// \code
2093*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);
2094*bed243d3SAndroid Build Coastguard Worker /// \endcode
2095*bed243d3SAndroid Build Coastguard Worker ///
2096*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
2097*bed243d3SAndroid Build Coastguard Worker /// instruction.
2098*bed243d3SAndroid Build Coastguard Worker ///
2099*bed243d3SAndroid Build Coastguard Worker /// \param A
2100*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2101*bed243d3SAndroid Build Coastguard Worker /// compared.
2102*bed243d3SAndroid Build Coastguard Worker /// \param LA
2103*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
2104*bed243d3SAndroid Build Coastguard Worker /// \param B
2105*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2106*bed243d3SAndroid Build Coastguard Worker /// compared.
2107*bed243d3SAndroid Build Coastguard Worker /// \param LB
2108*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
2109*bed243d3SAndroid Build Coastguard Worker /// \param M
2110*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2111*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2112*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2113*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2114*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2115*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2116*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2117*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2118*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2119*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2120*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2121*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2122*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2123*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2124*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2125*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2126*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
2127*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2128*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2129*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2130*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2131*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2132*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
2133*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.
2134*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestrc(A, LA, B, LB, M) \
2135*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
2136*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
2137*bed243d3SAndroid Build Coastguard Worker (int)(M)))
2138*bed243d3SAndroid Build Coastguard Worker
2139*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
2140*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
2141*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns bit 0 of the resulting bit mask.
2142*bed243d3SAndroid Build Coastguard Worker ///
2143*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2144*bed243d3SAndroid Build Coastguard Worker ///
2145*bed243d3SAndroid Build Coastguard Worker /// \code
2146*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);
2147*bed243d3SAndroid Build Coastguard Worker /// \endcode
2148*bed243d3SAndroid Build Coastguard Worker ///
2149*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
2150*bed243d3SAndroid Build Coastguard Worker /// instruction.
2151*bed243d3SAndroid Build Coastguard Worker ///
2152*bed243d3SAndroid Build Coastguard Worker /// \param A
2153*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2154*bed243d3SAndroid Build Coastguard Worker /// compared.
2155*bed243d3SAndroid Build Coastguard Worker /// \param LA
2156*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
2157*bed243d3SAndroid Build Coastguard Worker /// \param B
2158*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2159*bed243d3SAndroid Build Coastguard Worker /// compared.
2160*bed243d3SAndroid Build Coastguard Worker /// \param LB
2161*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
2162*bed243d3SAndroid Build Coastguard Worker /// \param M
2163*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2164*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2165*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2166*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2167*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2168*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2169*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2170*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2171*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2172*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2173*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2174*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2175*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2176*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2177*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2178*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2179*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
2180*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2181*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2182*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2183*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2184*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2185*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B.
2186*bed243d3SAndroid Build Coastguard Worker /// \returns Returns bit 0 of the resulting bit mask.
2187*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestro(A, LA, B, LB, M) \
2188*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
2189*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
2190*bed243d3SAndroid Build Coastguard Worker (int)(M)))
2191*bed243d3SAndroid Build Coastguard Worker
2192*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
2193*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
2194*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the length of the string in \a A is less than
2195*bed243d3SAndroid Build Coastguard Worker /// the maximum, otherwise, returns 0.
2196*bed243d3SAndroid Build Coastguard Worker ///
2197*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2198*bed243d3SAndroid Build Coastguard Worker ///
2199*bed243d3SAndroid Build Coastguard Worker /// \code
2200*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);
2201*bed243d3SAndroid Build Coastguard Worker /// \endcode
2202*bed243d3SAndroid Build Coastguard Worker ///
2203*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>
2204*bed243d3SAndroid Build Coastguard Worker /// instruction.
2205*bed243d3SAndroid Build Coastguard Worker ///
2206*bed243d3SAndroid Build Coastguard Worker /// \param A
2207*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2208*bed243d3SAndroid Build Coastguard Worker /// compared.
2209*bed243d3SAndroid Build Coastguard Worker /// \param LA
2210*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
2211*bed243d3SAndroid Build Coastguard Worker /// \param B
2212*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2213*bed243d3SAndroid Build Coastguard Worker /// compared.
2214*bed243d3SAndroid Build Coastguard Worker /// \param LB
2215*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
2216*bed243d3SAndroid Build Coastguard Worker /// \param M
2217*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2218*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2219*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2220*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2221*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2222*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2223*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2224*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2225*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2226*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2227*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2228*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2229*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2230*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2231*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2232*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2233*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement in the bit
2234*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2235*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2236*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2237*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2238*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2239*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B. \n
2240*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the length of the string in \a A is less than the
2241*bed243d3SAndroid Build Coastguard Worker /// maximum, otherwise, returns 0.
2242*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestrs(A, LA, B, LB, M) \
2243*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
2244*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
2245*bed243d3SAndroid Build Coastguard Worker (int)(M)))
2246*bed243d3SAndroid Build Coastguard Worker
2247*bed243d3SAndroid Build Coastguard Worker /// Uses the immediate operand \a M to perform a comparison of string
2248*bed243d3SAndroid Build Coastguard Worker /// data with explicitly defined lengths that is contained in source operands
2249*bed243d3SAndroid Build Coastguard Worker /// \a A and \a B. Returns 1 if the length of the string in \a B is less than
2250*bed243d3SAndroid Build Coastguard Worker /// the maximum, otherwise, returns 0.
2251*bed243d3SAndroid Build Coastguard Worker ///
2252*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2253*bed243d3SAndroid Build Coastguard Worker ///
2254*bed243d3SAndroid Build Coastguard Worker /// \code
2255*bed243d3SAndroid Build Coastguard Worker /// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);
2256*bed243d3SAndroid Build Coastguard Worker /// \endcode
2257*bed243d3SAndroid Build Coastguard Worker ///
2258*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.
2259*bed243d3SAndroid Build Coastguard Worker ///
2260*bed243d3SAndroid Build Coastguard Worker /// \param A
2261*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2262*bed243d3SAndroid Build Coastguard Worker /// compared.
2263*bed243d3SAndroid Build Coastguard Worker /// \param LA
2264*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a A.
2265*bed243d3SAndroid Build Coastguard Worker /// \param B
2266*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector containing one of the source operands to be
2267*bed243d3SAndroid Build Coastguard Worker /// compared.
2268*bed243d3SAndroid Build Coastguard Worker /// \param LB
2269*bed243d3SAndroid Build Coastguard Worker /// An integer that specifies the length of the string in \a B.
2270*bed243d3SAndroid Build Coastguard Worker /// \param M
2271*bed243d3SAndroid Build Coastguard Worker /// An 8-bit immediate operand specifying whether the characters are bytes or
2272*bed243d3SAndroid Build Coastguard Worker /// words and the type of comparison to perform. \n
2273*bed243d3SAndroid Build Coastguard Worker /// Bits [1:0]: Determine source data format. \n
2274*bed243d3SAndroid Build Coastguard Worker /// 00: 16 unsigned bytes \n
2275*bed243d3SAndroid Build Coastguard Worker /// 01: 8 unsigned words \n
2276*bed243d3SAndroid Build Coastguard Worker /// 10: 16 signed bytes \n
2277*bed243d3SAndroid Build Coastguard Worker /// 11: 8 signed words \n
2278*bed243d3SAndroid Build Coastguard Worker /// Bits [3:2]: Determine comparison type and aggregation method. \n
2279*bed243d3SAndroid Build Coastguard Worker /// 00: Subset: Each character in \a B is compared for equality with all
2280*bed243d3SAndroid Build Coastguard Worker /// the characters in \a A. \n
2281*bed243d3SAndroid Build Coastguard Worker /// 01: Ranges: Each character in \a B is compared to \a A. The comparison
2282*bed243d3SAndroid Build Coastguard Worker /// basis is greater than or equal for even-indexed elements in \a A,
2283*bed243d3SAndroid Build Coastguard Worker /// and less than or equal for odd-indexed elements in \a A. \n
2284*bed243d3SAndroid Build Coastguard Worker /// 10: Match: Compare each pair of corresponding characters in \a A and
2285*bed243d3SAndroid Build Coastguard Worker /// \a B for equality. \n
2286*bed243d3SAndroid Build Coastguard Worker /// 11: Substring: Search \a B for substring matches of \a A. \n
2287*bed243d3SAndroid Build Coastguard Worker /// Bits [5:4]: Determine whether to perform a one's complement on the bit
2288*bed243d3SAndroid Build Coastguard Worker /// mask of the comparison results. \n
2289*bed243d3SAndroid Build Coastguard Worker /// 00: No effect. \n
2290*bed243d3SAndroid Build Coastguard Worker /// 01: Negate the bit mask. \n
2291*bed243d3SAndroid Build Coastguard Worker /// 10: No effect. \n
2292*bed243d3SAndroid Build Coastguard Worker /// 11: Negate the bit mask only for bits with an index less than or equal
2293*bed243d3SAndroid Build Coastguard Worker /// to the size of \a A or \a B.
2294*bed243d3SAndroid Build Coastguard Worker /// \returns Returns 1 if the length of the string in \a B is less than the
2295*bed243d3SAndroid Build Coastguard Worker /// maximum, otherwise, returns 0.
2296*bed243d3SAndroid Build Coastguard Worker #define _mm_cmpestrz(A, LA, B, LB, M) \
2297*bed243d3SAndroid Build Coastguard Worker ((int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
2298*bed243d3SAndroid Build Coastguard Worker (__v16qi)(__m128i)(B), (int)(LB), \
2299*bed243d3SAndroid Build Coastguard Worker (int)(M)))
2300*bed243d3SAndroid Build Coastguard Worker
2301*bed243d3SAndroid Build Coastguard Worker /* SSE4.2 Compare Packed Data -- Greater Than. */
2302*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding 64-bit values of the 128-bit
2303*bed243d3SAndroid Build Coastguard Worker /// integer vectors to determine if the values in the first operand are
2304*bed243d3SAndroid Build Coastguard Worker /// greater than those in the second operand.
2305*bed243d3SAndroid Build Coastguard Worker ///
2306*bed243d3SAndroid Build Coastguard Worker /// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
2307*bed243d3SAndroid Build Coastguard Worker ///
2308*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2309*bed243d3SAndroid Build Coastguard Worker ///
2310*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.
2311*bed243d3SAndroid Build Coastguard Worker ///
2312*bed243d3SAndroid Build Coastguard Worker /// \param __V1
2313*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
2314*bed243d3SAndroid Build Coastguard Worker /// \param __V2
2315*bed243d3SAndroid Build Coastguard Worker /// A 128-bit integer vector.
2316*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the comparison results.
_mm_cmpgt_epi64(__m128i __V1,__m128i __V2)2317*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi64(__m128i __V1,
2318*bed243d3SAndroid Build Coastguard Worker __m128i __V2) {
2319*bed243d3SAndroid Build Coastguard Worker return (__m128i)((__v2di)__V1 > (__v2di)__V2);
2320*bed243d3SAndroid Build Coastguard Worker }
2321*bed243d3SAndroid Build Coastguard Worker
2322*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS
2323*bed243d3SAndroid Build Coastguard Worker
2324*bed243d3SAndroid Build Coastguard Worker #include <popcntintrin.h>
2325*bed243d3SAndroid Build Coastguard Worker
2326*bed243d3SAndroid Build Coastguard Worker #include <crc32intrin.h>
2327*bed243d3SAndroid Build Coastguard Worker
2328*bed243d3SAndroid Build Coastguard Worker #endif /* __SMMINTRIN_H */
2329