xref: /aosp_15_r20/prebuilts/sdk/renderscript/clang-include/avxintrin.h (revision 344a7f5ef16c479e7a7f54ee6567a9d112f9e72b)
1*344a7f5eSAndroid Build Coastguard Worker /*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2*344a7f5eSAndroid Build Coastguard Worker  *
3*344a7f5eSAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*344a7f5eSAndroid Build Coastguard Worker  * of this software and associated documentation files (the "Software"), to deal
5*344a7f5eSAndroid Build Coastguard Worker  * in the Software without restriction, including without limitation the rights
6*344a7f5eSAndroid Build Coastguard Worker  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*344a7f5eSAndroid Build Coastguard Worker  * copies of the Software, and to permit persons to whom the Software is
8*344a7f5eSAndroid Build Coastguard Worker  * furnished to do so, subject to the following conditions:
9*344a7f5eSAndroid Build Coastguard Worker  *
10*344a7f5eSAndroid Build Coastguard Worker  * The above copyright notice and this permission notice shall be included in
11*344a7f5eSAndroid Build Coastguard Worker  * all copies or substantial portions of the Software.
12*344a7f5eSAndroid Build Coastguard Worker  *
13*344a7f5eSAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*344a7f5eSAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*344a7f5eSAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*344a7f5eSAndroid Build Coastguard Worker  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*344a7f5eSAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*344a7f5eSAndroid Build Coastguard Worker  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*344a7f5eSAndroid Build Coastguard Worker  * THE SOFTWARE.
20*344a7f5eSAndroid Build Coastguard Worker  *
21*344a7f5eSAndroid Build Coastguard Worker  *===-----------------------------------------------------------------------===
22*344a7f5eSAndroid Build Coastguard Worker  */
23*344a7f5eSAndroid Build Coastguard Worker 
24*344a7f5eSAndroid Build Coastguard Worker #ifndef __IMMINTRIN_H
25*344a7f5eSAndroid Build Coastguard Worker #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
26*344a7f5eSAndroid Build Coastguard Worker #endif
27*344a7f5eSAndroid Build Coastguard Worker 
28*344a7f5eSAndroid Build Coastguard Worker #ifndef __AVXINTRIN_H
29*344a7f5eSAndroid Build Coastguard Worker #define __AVXINTRIN_H
30*344a7f5eSAndroid Build Coastguard Worker 
31*344a7f5eSAndroid Build Coastguard Worker typedef double __v4df __attribute__ ((__vector_size__ (32)));
32*344a7f5eSAndroid Build Coastguard Worker typedef float __v8sf __attribute__ ((__vector_size__ (32)));
33*344a7f5eSAndroid Build Coastguard Worker typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34*344a7f5eSAndroid Build Coastguard Worker typedef int __v8si __attribute__ ((__vector_size__ (32)));
35*344a7f5eSAndroid Build Coastguard Worker typedef short __v16hi __attribute__ ((__vector_size__ (32)));
36*344a7f5eSAndroid Build Coastguard Worker typedef char __v32qi __attribute__ ((__vector_size__ (32)));
37*344a7f5eSAndroid Build Coastguard Worker 
38*344a7f5eSAndroid Build Coastguard Worker /* Unsigned types */
39*344a7f5eSAndroid Build Coastguard Worker typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
40*344a7f5eSAndroid Build Coastguard Worker typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
41*344a7f5eSAndroid Build Coastguard Worker typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
42*344a7f5eSAndroid Build Coastguard Worker typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
43*344a7f5eSAndroid Build Coastguard Worker 
44*344a7f5eSAndroid Build Coastguard Worker /* We need an explicitly signed variant for char. Note that this shouldn't
45*344a7f5eSAndroid Build Coastguard Worker  * appear in the interface though. */
46*344a7f5eSAndroid Build Coastguard Worker typedef signed char __v32qs __attribute__((__vector_size__(32)));
47*344a7f5eSAndroid Build Coastguard Worker 
48*344a7f5eSAndroid Build Coastguard Worker typedef float __m256 __attribute__ ((__vector_size__ (32)));
49*344a7f5eSAndroid Build Coastguard Worker typedef double __m256d __attribute__((__vector_size__(32)));
50*344a7f5eSAndroid Build Coastguard Worker typedef long long __m256i __attribute__((__vector_size__(32)));
51*344a7f5eSAndroid Build Coastguard Worker 
52*344a7f5eSAndroid Build Coastguard Worker /* Define the default attributes for the functions in this file. */
53*344a7f5eSAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
54*344a7f5eSAndroid Build Coastguard Worker 
55*344a7f5eSAndroid Build Coastguard Worker /* Arithmetic */
56*344a7f5eSAndroid Build Coastguard Worker /// \brief Adds two 256-bit vectors of [4 x double].
57*344a7f5eSAndroid Build Coastguard Worker ///
58*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
59*344a7f5eSAndroid Build Coastguard Worker ///
60*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VADDPD / ADDPD instruction.
61*344a7f5eSAndroid Build Coastguard Worker ///
62*344a7f5eSAndroid Build Coastguard Worker /// \param __a
63*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
64*344a7f5eSAndroid Build Coastguard Worker /// \param __b
65*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
66*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the sums of both
67*344a7f5eSAndroid Build Coastguard Worker ///    operands.
68*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_add_pd(__m256d __a,__m256d __b)69*344a7f5eSAndroid Build Coastguard Worker _mm256_add_pd(__m256d __a, __m256d __b)
70*344a7f5eSAndroid Build Coastguard Worker {
71*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a+(__v4df)__b);
72*344a7f5eSAndroid Build Coastguard Worker }
73*344a7f5eSAndroid Build Coastguard Worker 
74*344a7f5eSAndroid Build Coastguard Worker /// \brief Adds two 256-bit vectors of [8 x float].
75*344a7f5eSAndroid Build Coastguard Worker ///
76*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
77*344a7f5eSAndroid Build Coastguard Worker ///
78*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VADDPS / ADDPS instruction.
79*344a7f5eSAndroid Build Coastguard Worker ///
80*344a7f5eSAndroid Build Coastguard Worker /// \param __a
81*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
82*344a7f5eSAndroid Build Coastguard Worker /// \param __b
83*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
84*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the sums of both
85*344a7f5eSAndroid Build Coastguard Worker ///    operands.
86*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_add_ps(__m256 __a,__m256 __b)87*344a7f5eSAndroid Build Coastguard Worker _mm256_add_ps(__m256 __a, __m256 __b)
88*344a7f5eSAndroid Build Coastguard Worker {
89*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a+(__v8sf)__b);
90*344a7f5eSAndroid Build Coastguard Worker }
91*344a7f5eSAndroid Build Coastguard Worker 
92*344a7f5eSAndroid Build Coastguard Worker /// \brief Subtracts two 256-bit vectors of [4 x double].
93*344a7f5eSAndroid Build Coastguard Worker ///
94*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
95*344a7f5eSAndroid Build Coastguard Worker ///
96*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSUBPD / SUBPD instruction.
97*344a7f5eSAndroid Build Coastguard Worker ///
98*344a7f5eSAndroid Build Coastguard Worker /// \param __a
99*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the minuend.
100*344a7f5eSAndroid Build Coastguard Worker /// \param __b
101*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the subtrahend.
102*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the differences between
103*344a7f5eSAndroid Build Coastguard Worker ///    both operands.
104*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_sub_pd(__m256d __a,__m256d __b)105*344a7f5eSAndroid Build Coastguard Worker _mm256_sub_pd(__m256d __a, __m256d __b)
106*344a7f5eSAndroid Build Coastguard Worker {
107*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a-(__v4df)__b);
108*344a7f5eSAndroid Build Coastguard Worker }
109*344a7f5eSAndroid Build Coastguard Worker 
110*344a7f5eSAndroid Build Coastguard Worker /// \brief Subtracts two 256-bit vectors of [8 x float].
111*344a7f5eSAndroid Build Coastguard Worker ///
112*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
113*344a7f5eSAndroid Build Coastguard Worker ///
114*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSUBPS / SUBPS instruction.
115*344a7f5eSAndroid Build Coastguard Worker ///
116*344a7f5eSAndroid Build Coastguard Worker /// \param __a
117*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the minuend.
118*344a7f5eSAndroid Build Coastguard Worker /// \param __b
119*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the subtrahend.
120*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the differences between
121*344a7f5eSAndroid Build Coastguard Worker ///    both operands.
122*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_sub_ps(__m256 __a,__m256 __b)123*344a7f5eSAndroid Build Coastguard Worker _mm256_sub_ps(__m256 __a, __m256 __b)
124*344a7f5eSAndroid Build Coastguard Worker {
125*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a-(__v8sf)__b);
126*344a7f5eSAndroid Build Coastguard Worker }
127*344a7f5eSAndroid Build Coastguard Worker 
128*344a7f5eSAndroid Build Coastguard Worker /// \brief Adds the even-indexed values and subtracts the odd-indexed values of
129*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double].
130*344a7f5eSAndroid Build Coastguard Worker ///
131*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
132*344a7f5eSAndroid Build Coastguard Worker ///
133*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VADDSUBPD / ADDSUBPD instruction.
134*344a7f5eSAndroid Build Coastguard Worker ///
135*344a7f5eSAndroid Build Coastguard Worker /// \param __a
136*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the left source operand.
137*344a7f5eSAndroid Build Coastguard Worker /// \param __b
138*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the right source operand.
139*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the alternating sums
140*344a7f5eSAndroid Build Coastguard Worker ///    and differences between both operands.
141*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_addsub_pd(__m256d __a,__m256d __b)142*344a7f5eSAndroid Build Coastguard Worker _mm256_addsub_pd(__m256d __a, __m256d __b)
143*344a7f5eSAndroid Build Coastguard Worker {
144*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
145*344a7f5eSAndroid Build Coastguard Worker }
146*344a7f5eSAndroid Build Coastguard Worker 
147*344a7f5eSAndroid Build Coastguard Worker /// \brief Adds the even-indexed values and subtracts the odd-indexed values of
148*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float].
149*344a7f5eSAndroid Build Coastguard Worker ///
150*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
151*344a7f5eSAndroid Build Coastguard Worker ///
152*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VADDSUBPS / ADDSUBPS instruction.
153*344a7f5eSAndroid Build Coastguard Worker ///
154*344a7f5eSAndroid Build Coastguard Worker /// \param __a
155*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the left source operand.
156*344a7f5eSAndroid Build Coastguard Worker /// \param __b
157*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the right source operand.
158*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
159*344a7f5eSAndroid Build Coastguard Worker ///    differences between both operands.
160*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_addsub_ps(__m256 __a,__m256 __b)161*344a7f5eSAndroid Build Coastguard Worker _mm256_addsub_ps(__m256 __a, __m256 __b)
162*344a7f5eSAndroid Build Coastguard Worker {
163*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
164*344a7f5eSAndroid Build Coastguard Worker }
165*344a7f5eSAndroid Build Coastguard Worker 
166*344a7f5eSAndroid Build Coastguard Worker /// \brief Divides two 256-bit vectors of [4 x double].
167*344a7f5eSAndroid Build Coastguard Worker ///
168*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
169*344a7f5eSAndroid Build Coastguard Worker ///
170*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VDIVPD / DIVPD instruction.
171*344a7f5eSAndroid Build Coastguard Worker ///
172*344a7f5eSAndroid Build Coastguard Worker /// \param __a
173*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the dividend.
174*344a7f5eSAndroid Build Coastguard Worker /// \param __b
175*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the divisor.
176*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the quotients of both
177*344a7f5eSAndroid Build Coastguard Worker ///    operands.
178*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_div_pd(__m256d __a,__m256d __b)179*344a7f5eSAndroid Build Coastguard Worker _mm256_div_pd(__m256d __a, __m256d __b)
180*344a7f5eSAndroid Build Coastguard Worker {
181*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a/(__v4df)__b);
182*344a7f5eSAndroid Build Coastguard Worker }
183*344a7f5eSAndroid Build Coastguard Worker 
184*344a7f5eSAndroid Build Coastguard Worker /// \brief Divides two 256-bit vectors of [8 x float].
185*344a7f5eSAndroid Build Coastguard Worker ///
186*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
187*344a7f5eSAndroid Build Coastguard Worker ///
188*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VDIVPS / DIVPS instruction.
189*344a7f5eSAndroid Build Coastguard Worker ///
190*344a7f5eSAndroid Build Coastguard Worker /// \param __a
191*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the dividend.
192*344a7f5eSAndroid Build Coastguard Worker /// \param __b
193*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the divisor.
194*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the quotients of both
195*344a7f5eSAndroid Build Coastguard Worker ///    operands.
196*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_div_ps(__m256 __a,__m256 __b)197*344a7f5eSAndroid Build Coastguard Worker _mm256_div_ps(__m256 __a, __m256 __b)
198*344a7f5eSAndroid Build Coastguard Worker {
199*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a/(__v8sf)__b);
200*344a7f5eSAndroid Build Coastguard Worker }
201*344a7f5eSAndroid Build Coastguard Worker 
202*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares two 256-bit vectors of [4 x double] and returns the greater
203*344a7f5eSAndroid Build Coastguard Worker ///    of each pair of values.
204*344a7f5eSAndroid Build Coastguard Worker ///
205*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
206*344a7f5eSAndroid Build Coastguard Worker ///
207*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMAXPD / MAXPD instruction.
208*344a7f5eSAndroid Build Coastguard Worker ///
209*344a7f5eSAndroid Build Coastguard Worker /// \param __a
210*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
211*344a7f5eSAndroid Build Coastguard Worker /// \param __b
212*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
213*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the maximum values
214*344a7f5eSAndroid Build Coastguard Worker ///    between both operands.
215*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_max_pd(__m256d __a,__m256d __b)216*344a7f5eSAndroid Build Coastguard Worker _mm256_max_pd(__m256d __a, __m256d __b)
217*344a7f5eSAndroid Build Coastguard Worker {
218*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
219*344a7f5eSAndroid Build Coastguard Worker }
220*344a7f5eSAndroid Build Coastguard Worker 
221*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares two 256-bit vectors of [8 x float] and returns the greater
222*344a7f5eSAndroid Build Coastguard Worker ///    of each pair of values.
223*344a7f5eSAndroid Build Coastguard Worker ///
224*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
225*344a7f5eSAndroid Build Coastguard Worker ///
226*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMAXPS / MAXPS instruction.
227*344a7f5eSAndroid Build Coastguard Worker ///
228*344a7f5eSAndroid Build Coastguard Worker /// \param __a
229*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
230*344a7f5eSAndroid Build Coastguard Worker /// \param __b
231*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
232*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the maximum values
233*344a7f5eSAndroid Build Coastguard Worker ///    between both operands.
234*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_max_ps(__m256 __a,__m256 __b)235*344a7f5eSAndroid Build Coastguard Worker _mm256_max_ps(__m256 __a, __m256 __b)
236*344a7f5eSAndroid Build Coastguard Worker {
237*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
238*344a7f5eSAndroid Build Coastguard Worker }
239*344a7f5eSAndroid Build Coastguard Worker 
240*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares two 256-bit vectors of [4 x double] and returns the lesser
241*344a7f5eSAndroid Build Coastguard Worker ///    of each pair of values.
242*344a7f5eSAndroid Build Coastguard Worker ///
243*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
244*344a7f5eSAndroid Build Coastguard Worker ///
245*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMINPD / MINPD instruction.
246*344a7f5eSAndroid Build Coastguard Worker ///
247*344a7f5eSAndroid Build Coastguard Worker /// \param __a
248*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
249*344a7f5eSAndroid Build Coastguard Worker /// \param __b
250*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
251*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the minimum values
252*344a7f5eSAndroid Build Coastguard Worker ///    between both operands.
253*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_min_pd(__m256d __a,__m256d __b)254*344a7f5eSAndroid Build Coastguard Worker _mm256_min_pd(__m256d __a, __m256d __b)
255*344a7f5eSAndroid Build Coastguard Worker {
256*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
257*344a7f5eSAndroid Build Coastguard Worker }
258*344a7f5eSAndroid Build Coastguard Worker 
259*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares two 256-bit vectors of [8 x float] and returns the lesser
260*344a7f5eSAndroid Build Coastguard Worker ///    of each pair of values.
261*344a7f5eSAndroid Build Coastguard Worker ///
262*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
263*344a7f5eSAndroid Build Coastguard Worker ///
264*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMINPS / MINPS instruction.
265*344a7f5eSAndroid Build Coastguard Worker ///
266*344a7f5eSAndroid Build Coastguard Worker /// \param __a
267*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
268*344a7f5eSAndroid Build Coastguard Worker /// \param __b
269*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
270*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the minimum values
271*344a7f5eSAndroid Build Coastguard Worker ///    between both operands.
272*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_min_ps(__m256 __a,__m256 __b)273*344a7f5eSAndroid Build Coastguard Worker _mm256_min_ps(__m256 __a, __m256 __b)
274*344a7f5eSAndroid Build Coastguard Worker {
275*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
276*344a7f5eSAndroid Build Coastguard Worker }
277*344a7f5eSAndroid Build Coastguard Worker 
278*344a7f5eSAndroid Build Coastguard Worker /// \brief Multiplies two 256-bit vectors of [4 x double].
279*344a7f5eSAndroid Build Coastguard Worker ///
280*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
281*344a7f5eSAndroid Build Coastguard Worker ///
282*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMULPD / MULPD instruction.
283*344a7f5eSAndroid Build Coastguard Worker ///
284*344a7f5eSAndroid Build Coastguard Worker /// \param __a
285*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
286*344a7f5eSAndroid Build Coastguard Worker /// \param __b
287*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
288*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the products of both
289*344a7f5eSAndroid Build Coastguard Worker ///    operands.
290*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_mul_pd(__m256d __a,__m256d __b)291*344a7f5eSAndroid Build Coastguard Worker _mm256_mul_pd(__m256d __a, __m256d __b)
292*344a7f5eSAndroid Build Coastguard Worker {
293*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a * (__v4df)__b);
294*344a7f5eSAndroid Build Coastguard Worker }
295*344a7f5eSAndroid Build Coastguard Worker 
296*344a7f5eSAndroid Build Coastguard Worker /// \brief Multiplies two 256-bit vectors of [8 x float].
297*344a7f5eSAndroid Build Coastguard Worker ///
298*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
299*344a7f5eSAndroid Build Coastguard Worker ///
300*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VMULPS / MULPS instruction.
301*344a7f5eSAndroid Build Coastguard Worker ///
302*344a7f5eSAndroid Build Coastguard Worker /// \param __a
303*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
304*344a7f5eSAndroid Build Coastguard Worker /// \param __b
305*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
306*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the products of both
307*344a7f5eSAndroid Build Coastguard Worker ///    operands.
308*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_mul_ps(__m256 __a,__m256 __b)309*344a7f5eSAndroid Build Coastguard Worker _mm256_mul_ps(__m256 __a, __m256 __b)
310*344a7f5eSAndroid Build Coastguard Worker {
311*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a * (__v8sf)__b);
312*344a7f5eSAndroid Build Coastguard Worker }
313*344a7f5eSAndroid Build Coastguard Worker 
314*344a7f5eSAndroid Build Coastguard Worker /// \brief Calculates the square roots of the values in a 256-bit vector of
315*344a7f5eSAndroid Build Coastguard Worker ///    [4 x double].
316*344a7f5eSAndroid Build Coastguard Worker ///
317*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
318*344a7f5eSAndroid Build Coastguard Worker ///
319*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSQRTPD / SQRTPD instruction.
320*344a7f5eSAndroid Build Coastguard Worker ///
321*344a7f5eSAndroid Build Coastguard Worker /// \param __a
322*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
323*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the square roots of the
324*344a7f5eSAndroid Build Coastguard Worker ///    values in the operand.
325*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_sqrt_pd(__m256d __a)326*344a7f5eSAndroid Build Coastguard Worker _mm256_sqrt_pd(__m256d __a)
327*344a7f5eSAndroid Build Coastguard Worker {
328*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
329*344a7f5eSAndroid Build Coastguard Worker }
330*344a7f5eSAndroid Build Coastguard Worker 
331*344a7f5eSAndroid Build Coastguard Worker /// \brief Calculates the square roots of the values in a 256-bit vector of
332*344a7f5eSAndroid Build Coastguard Worker ///    [8 x float].
333*344a7f5eSAndroid Build Coastguard Worker ///
334*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
335*344a7f5eSAndroid Build Coastguard Worker ///
336*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSQRTPS / SQRTPS instruction.
337*344a7f5eSAndroid Build Coastguard Worker ///
338*344a7f5eSAndroid Build Coastguard Worker /// \param __a
339*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
340*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the square roots of the
341*344a7f5eSAndroid Build Coastguard Worker ///    values in the operand.
342*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_sqrt_ps(__m256 __a)343*344a7f5eSAndroid Build Coastguard Worker _mm256_sqrt_ps(__m256 __a)
344*344a7f5eSAndroid Build Coastguard Worker {
345*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
346*344a7f5eSAndroid Build Coastguard Worker }
347*344a7f5eSAndroid Build Coastguard Worker 
348*344a7f5eSAndroid Build Coastguard Worker /// \brief Calculates the reciprocal square roots of the values in a 256-bit
349*344a7f5eSAndroid Build Coastguard Worker ///    vector of [8 x float].
350*344a7f5eSAndroid Build Coastguard Worker ///
351*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
352*344a7f5eSAndroid Build Coastguard Worker ///
353*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VRSQRTPS / RSQRTPS instruction.
354*344a7f5eSAndroid Build Coastguard Worker ///
355*344a7f5eSAndroid Build Coastguard Worker /// \param __a
356*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
357*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the reciprocal square
358*344a7f5eSAndroid Build Coastguard Worker ///    roots of the values in the operand.
359*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_rsqrt_ps(__m256 __a)360*344a7f5eSAndroid Build Coastguard Worker _mm256_rsqrt_ps(__m256 __a)
361*344a7f5eSAndroid Build Coastguard Worker {
362*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
363*344a7f5eSAndroid Build Coastguard Worker }
364*344a7f5eSAndroid Build Coastguard Worker 
365*344a7f5eSAndroid Build Coastguard Worker /// \brief Calculates the reciprocals of the values in a 256-bit vector of
366*344a7f5eSAndroid Build Coastguard Worker ///    [8 x float].
367*344a7f5eSAndroid Build Coastguard Worker ///
368*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
369*344a7f5eSAndroid Build Coastguard Worker ///
370*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VRCPPS / RCPPS instruction.
371*344a7f5eSAndroid Build Coastguard Worker ///
372*344a7f5eSAndroid Build Coastguard Worker /// \param __a
373*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
374*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the reciprocals of the
375*344a7f5eSAndroid Build Coastguard Worker ///    values in the operand.
376*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_rcp_ps(__m256 __a)377*344a7f5eSAndroid Build Coastguard Worker _mm256_rcp_ps(__m256 __a)
378*344a7f5eSAndroid Build Coastguard Worker {
379*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
380*344a7f5eSAndroid Build Coastguard Worker }
381*344a7f5eSAndroid Build Coastguard Worker 
382*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds the values in a 256-bit vector of [4 x double] as specified
383*344a7f5eSAndroid Build Coastguard Worker ///    by the byte operand. The source values are rounded to integer values and
384*344a7f5eSAndroid Build Coastguard Worker ///    returned as 64-bit double-precision floating-point values.
385*344a7f5eSAndroid Build Coastguard Worker ///
386*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
387*344a7f5eSAndroid Build Coastguard Worker ///
388*344a7f5eSAndroid Build Coastguard Worker /// \code
389*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_round_pd(__m256d V, const int M);
390*344a7f5eSAndroid Build Coastguard Worker /// \endcode
391*344a7f5eSAndroid Build Coastguard Worker ///
392*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
393*344a7f5eSAndroid Build Coastguard Worker ///
394*344a7f5eSAndroid Build Coastguard Worker /// \param V
395*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
396*344a7f5eSAndroid Build Coastguard Worker /// \param M
397*344a7f5eSAndroid Build Coastguard Worker ///    An integer value that specifies the rounding operation.
398*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:4] are reserved.
399*344a7f5eSAndroid Build Coastguard Worker ///    Bit [3] is a precision exception value:
400*344a7f5eSAndroid Build Coastguard Worker ///    0: A normal PE exception is used.
401*344a7f5eSAndroid Build Coastguard Worker ///    1: The PE field is not updated.
402*344a7f5eSAndroid Build Coastguard Worker ///    Bit [2] is the rounding control source:
403*344a7f5eSAndroid Build Coastguard Worker ///    0: Use bits [1:0] of M.
404*344a7f5eSAndroid Build Coastguard Worker ///    1: Use the current MXCSR setting.
405*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0] contain the rounding control definition:
406*344a7f5eSAndroid Build Coastguard Worker ///    00: Nearest.
407*344a7f5eSAndroid Build Coastguard Worker ///    01: Downward (toward negative infinity).
408*344a7f5eSAndroid Build Coastguard Worker ///    10: Upward (toward positive infinity).
409*344a7f5eSAndroid Build Coastguard Worker ///    11: Truncated.
410*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded values.
411*344a7f5eSAndroid Build Coastguard Worker #define _mm256_round_pd(V, M) __extension__ ({ \
412*344a7f5eSAndroid Build Coastguard Worker     (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
413*344a7f5eSAndroid Build Coastguard Worker 
414*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds the values stored in a 256-bit vector of [8 x float] as
415*344a7f5eSAndroid Build Coastguard Worker ///    specified by the byte operand. The source values are rounded to integer
416*344a7f5eSAndroid Build Coastguard Worker ///    values and returned as floating-point values.
417*344a7f5eSAndroid Build Coastguard Worker ///
418*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
419*344a7f5eSAndroid Build Coastguard Worker ///
420*344a7f5eSAndroid Build Coastguard Worker /// \code
421*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_round_ps(__m256 V, const int M);
422*344a7f5eSAndroid Build Coastguard Worker /// \endcode
423*344a7f5eSAndroid Build Coastguard Worker ///
424*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
425*344a7f5eSAndroid Build Coastguard Worker ///
426*344a7f5eSAndroid Build Coastguard Worker /// \param V
427*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
428*344a7f5eSAndroid Build Coastguard Worker /// \param M
429*344a7f5eSAndroid Build Coastguard Worker ///    An integer value that specifies the rounding operation.
430*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:4] are reserved.
431*344a7f5eSAndroid Build Coastguard Worker ///    Bit [3] is a precision exception value:
432*344a7f5eSAndroid Build Coastguard Worker ///    0: A normal PE exception is used.
433*344a7f5eSAndroid Build Coastguard Worker ///    1: The PE field is not updated.
434*344a7f5eSAndroid Build Coastguard Worker ///    Bit [2] is the rounding control source:
435*344a7f5eSAndroid Build Coastguard Worker ///    0: Use bits [1:0] of M.
436*344a7f5eSAndroid Build Coastguard Worker ///    1: Use the current MXCSR setting.
437*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0] contain the rounding control definition:
438*344a7f5eSAndroid Build Coastguard Worker ///    00: Nearest.
439*344a7f5eSAndroid Build Coastguard Worker ///    01: Downward (toward negative infinity).
440*344a7f5eSAndroid Build Coastguard Worker ///    10: Upward (toward positive infinity).
441*344a7f5eSAndroid Build Coastguard Worker ///    11: Truncated.
442*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded values.
443*344a7f5eSAndroid Build Coastguard Worker #define _mm256_round_ps(V, M) __extension__ ({ \
444*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
445*344a7f5eSAndroid Build Coastguard Worker 
446*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds up the values stored in a 256-bit vector of [4 x double]. The
447*344a7f5eSAndroid Build Coastguard Worker ///    source values are rounded up to integer values and returned as 64-bit
448*344a7f5eSAndroid Build Coastguard Worker ///    double-precision floating-point values.
449*344a7f5eSAndroid Build Coastguard Worker ///
450*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
451*344a7f5eSAndroid Build Coastguard Worker ///
452*344a7f5eSAndroid Build Coastguard Worker /// \code
453*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_ceil_pd(__m256d V);
454*344a7f5eSAndroid Build Coastguard Worker /// \endcode
455*344a7f5eSAndroid Build Coastguard Worker ///
456*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
457*344a7f5eSAndroid Build Coastguard Worker ///
458*344a7f5eSAndroid Build Coastguard Worker /// \param V
459*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
460*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded up values.
461*344a7f5eSAndroid Build Coastguard Worker #define _mm256_ceil_pd(V)  _mm256_round_pd((V), _MM_FROUND_CEIL)
462*344a7f5eSAndroid Build Coastguard Worker 
463*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds down the values stored in a 256-bit vector of [4 x double].
464*344a7f5eSAndroid Build Coastguard Worker ///    The source values are rounded down to integer values and returned as
465*344a7f5eSAndroid Build Coastguard Worker ///    64-bit double-precision floating-point values.
466*344a7f5eSAndroid Build Coastguard Worker ///
467*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
468*344a7f5eSAndroid Build Coastguard Worker ///
469*344a7f5eSAndroid Build Coastguard Worker /// \code
470*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_floor_pd(__m256d V);
471*344a7f5eSAndroid Build Coastguard Worker /// \endcode
472*344a7f5eSAndroid Build Coastguard Worker ///
473*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
474*344a7f5eSAndroid Build Coastguard Worker ///
475*344a7f5eSAndroid Build Coastguard Worker /// \param V
476*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
477*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded down
478*344a7f5eSAndroid Build Coastguard Worker ///    values.
479*344a7f5eSAndroid Build Coastguard Worker #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
480*344a7f5eSAndroid Build Coastguard Worker 
481*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds up the values stored in a 256-bit vector of [8 x float]. The
482*344a7f5eSAndroid Build Coastguard Worker ///    source values are rounded up to integer values and returned as
483*344a7f5eSAndroid Build Coastguard Worker ///    floating-point values.
484*344a7f5eSAndroid Build Coastguard Worker ///
485*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
486*344a7f5eSAndroid Build Coastguard Worker ///
487*344a7f5eSAndroid Build Coastguard Worker /// \code
488*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_ceil_ps(__m256 V);
489*344a7f5eSAndroid Build Coastguard Worker /// \endcode
490*344a7f5eSAndroid Build Coastguard Worker ///
491*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
492*344a7f5eSAndroid Build Coastguard Worker ///
493*344a7f5eSAndroid Build Coastguard Worker /// \param V
494*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
495*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded up values.
496*344a7f5eSAndroid Build Coastguard Worker #define _mm256_ceil_ps(V)  _mm256_round_ps((V), _MM_FROUND_CEIL)
497*344a7f5eSAndroid Build Coastguard Worker 
498*344a7f5eSAndroid Build Coastguard Worker /// \brief Rounds down the values stored in a 256-bit vector of [8 x float]. The
499*344a7f5eSAndroid Build Coastguard Worker ///    source values are rounded down to integer values and returned as
500*344a7f5eSAndroid Build Coastguard Worker ///    floating-point values.
501*344a7f5eSAndroid Build Coastguard Worker ///
502*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
503*344a7f5eSAndroid Build Coastguard Worker ///
504*344a7f5eSAndroid Build Coastguard Worker /// \code
505*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_floor_ps(__m256 V);
506*344a7f5eSAndroid Build Coastguard Worker /// \endcode
507*344a7f5eSAndroid Build Coastguard Worker ///
508*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
509*344a7f5eSAndroid Build Coastguard Worker ///
510*344a7f5eSAndroid Build Coastguard Worker /// \param V
511*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
512*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded down values.
513*344a7f5eSAndroid Build Coastguard Worker #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
514*344a7f5eSAndroid Build Coastguard Worker 
515*344a7f5eSAndroid Build Coastguard Worker /* Logical */
516*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double].
517*344a7f5eSAndroid Build Coastguard Worker ///
518*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
519*344a7f5eSAndroid Build Coastguard Worker ///
520*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VANDPD / ANDPD instruction.
521*344a7f5eSAndroid Build Coastguard Worker ///
522*344a7f5eSAndroid Build Coastguard Worker /// \param __a
523*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
524*344a7f5eSAndroid Build Coastguard Worker /// \param __b
525*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
526*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
527*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
528*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_and_pd(__m256d __a,__m256d __b)529*344a7f5eSAndroid Build Coastguard Worker _mm256_and_pd(__m256d __a, __m256d __b)
530*344a7f5eSAndroid Build Coastguard Worker {
531*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a & (__v4du)__b);
532*344a7f5eSAndroid Build Coastguard Worker }
533*344a7f5eSAndroid Build Coastguard Worker 
534*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float].
535*344a7f5eSAndroid Build Coastguard Worker ///
536*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
537*344a7f5eSAndroid Build Coastguard Worker ///
538*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VANDPS / ANDPS instruction.
539*344a7f5eSAndroid Build Coastguard Worker ///
540*344a7f5eSAndroid Build Coastguard Worker /// \param __a
541*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
542*344a7f5eSAndroid Build Coastguard Worker /// \param __b
543*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
544*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
545*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
546*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_and_ps(__m256 __a,__m256 __b)547*344a7f5eSAndroid Build Coastguard Worker _mm256_and_ps(__m256 __a, __m256 __b)
548*344a7f5eSAndroid Build Coastguard Worker {
549*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8su)__a & (__v8su)__b);
550*344a7f5eSAndroid Build Coastguard Worker }
551*344a7f5eSAndroid Build Coastguard Worker 
552*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double], using
553*344a7f5eSAndroid Build Coastguard Worker ///    the one's complement of the values contained in the first source operand.
554*344a7f5eSAndroid Build Coastguard Worker ///
555*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
556*344a7f5eSAndroid Build Coastguard Worker ///
557*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VANDNPD / ANDNPD instruction.
558*344a7f5eSAndroid Build Coastguard Worker ///
559*344a7f5eSAndroid Build Coastguard Worker /// \param __a
560*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the left source operand. The
561*344a7f5eSAndroid Build Coastguard Worker ///    one's complement of this value is used in the bitwise AND.
562*344a7f5eSAndroid Build Coastguard Worker /// \param __b
563*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the right source operand.
564*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
565*344a7f5eSAndroid Build Coastguard Worker ///    values of the second operand and the one's complement of the first
566*344a7f5eSAndroid Build Coastguard Worker ///    operand.
567*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_andnot_pd(__m256d __a,__m256d __b)568*344a7f5eSAndroid Build Coastguard Worker _mm256_andnot_pd(__m256d __a, __m256d __b)
569*344a7f5eSAndroid Build Coastguard Worker {
570*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)(~(__v4du)__a & (__v4du)__b);
571*344a7f5eSAndroid Build Coastguard Worker }
572*344a7f5eSAndroid Build Coastguard Worker 
573*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float], using
574*344a7f5eSAndroid Build Coastguard Worker ///    the one's complement of the values contained in the first source operand.
575*344a7f5eSAndroid Build Coastguard Worker ///
576*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
577*344a7f5eSAndroid Build Coastguard Worker ///
578*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VANDNPS / ANDNPS instruction.
579*344a7f5eSAndroid Build Coastguard Worker ///
580*344a7f5eSAndroid Build Coastguard Worker /// \param __a
581*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the left source operand. The
582*344a7f5eSAndroid Build Coastguard Worker ///    one's complement of this value is used in the bitwise AND.
583*344a7f5eSAndroid Build Coastguard Worker /// \param __b
584*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the right source operand.
585*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
586*344a7f5eSAndroid Build Coastguard Worker ///    values of the second operand and the one's complement of the first
587*344a7f5eSAndroid Build Coastguard Worker ///    operand.
588*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_andnot_ps(__m256 __a,__m256 __b)589*344a7f5eSAndroid Build Coastguard Worker _mm256_andnot_ps(__m256 __a, __m256 __b)
590*344a7f5eSAndroid Build Coastguard Worker {
591*344a7f5eSAndroid Build Coastguard Worker   return (__m256)(~(__v8su)__a & (__v8su)__b);
592*344a7f5eSAndroid Build Coastguard Worker }
593*344a7f5eSAndroid Build Coastguard Worker 
594*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise OR of two 256-bit vectors of [4 x double].
595*344a7f5eSAndroid Build Coastguard Worker ///
596*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
597*344a7f5eSAndroid Build Coastguard Worker ///
598*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VORPD / ORPD instruction.
599*344a7f5eSAndroid Build Coastguard Worker ///
600*344a7f5eSAndroid Build Coastguard Worker /// \param __a
601*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
602*344a7f5eSAndroid Build Coastguard Worker /// \param __b
603*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
604*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
605*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
606*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_or_pd(__m256d __a,__m256d __b)607*344a7f5eSAndroid Build Coastguard Worker _mm256_or_pd(__m256d __a, __m256d __b)
608*344a7f5eSAndroid Build Coastguard Worker {
609*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a | (__v4du)__b);
610*344a7f5eSAndroid Build Coastguard Worker }
611*344a7f5eSAndroid Build Coastguard Worker 
612*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise OR of two 256-bit vectors of [8 x float].
613*344a7f5eSAndroid Build Coastguard Worker ///
614*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
615*344a7f5eSAndroid Build Coastguard Worker ///
616*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VORPS / ORPS instruction.
617*344a7f5eSAndroid Build Coastguard Worker ///
618*344a7f5eSAndroid Build Coastguard Worker /// \param __a
619*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
620*344a7f5eSAndroid Build Coastguard Worker /// \param __b
621*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
622*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
623*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
624*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_or_ps(__m256 __a,__m256 __b)625*344a7f5eSAndroid Build Coastguard Worker _mm256_or_ps(__m256 __a, __m256 __b)
626*344a7f5eSAndroid Build Coastguard Worker {
627*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8su)__a | (__v8su)__b);
628*344a7f5eSAndroid Build Coastguard Worker }
629*344a7f5eSAndroid Build Coastguard Worker 
630*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise XOR of two 256-bit vectors of [4 x double].
631*344a7f5eSAndroid Build Coastguard Worker ///
632*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
633*344a7f5eSAndroid Build Coastguard Worker ///
634*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VXORPD / XORPD instruction.
635*344a7f5eSAndroid Build Coastguard Worker ///
636*344a7f5eSAndroid Build Coastguard Worker /// \param __a
637*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
638*344a7f5eSAndroid Build Coastguard Worker /// \param __b
639*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
640*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
641*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
642*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_xor_pd(__m256d __a,__m256d __b)643*344a7f5eSAndroid Build Coastguard Worker _mm256_xor_pd(__m256d __a, __m256d __b)
644*344a7f5eSAndroid Build Coastguard Worker {
645*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a ^ (__v4du)__b);
646*344a7f5eSAndroid Build Coastguard Worker }
647*344a7f5eSAndroid Build Coastguard Worker 
648*344a7f5eSAndroid Build Coastguard Worker /// \brief Performs a bitwise XOR of two 256-bit vectors of [8 x float].
649*344a7f5eSAndroid Build Coastguard Worker ///
650*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
651*344a7f5eSAndroid Build Coastguard Worker ///
652*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VXORPS / XORPS instruction.
653*344a7f5eSAndroid Build Coastguard Worker ///
654*344a7f5eSAndroid Build Coastguard Worker /// \param __a
655*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
656*344a7f5eSAndroid Build Coastguard Worker /// \param __b
657*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
658*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
659*344a7f5eSAndroid Build Coastguard Worker ///    values between both operands.
660*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_xor_ps(__m256 __a,__m256 __b)661*344a7f5eSAndroid Build Coastguard Worker _mm256_xor_ps(__m256 __a, __m256 __b)
662*344a7f5eSAndroid Build Coastguard Worker {
663*344a7f5eSAndroid Build Coastguard Worker   return (__m256)((__v8su)__a ^ (__v8su)__b);
664*344a7f5eSAndroid Build Coastguard Worker }
665*344a7f5eSAndroid Build Coastguard Worker 
666*344a7f5eSAndroid Build Coastguard Worker /* Horizontal arithmetic */
667*344a7f5eSAndroid Build Coastguard Worker /// \brief Horizontally adds the adjacent pairs of values contained in two
668*344a7f5eSAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double].
669*344a7f5eSAndroid Build Coastguard Worker ///
670*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
671*344a7f5eSAndroid Build Coastguard Worker ///
672*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VHADDPD / HADDPD instruction.
673*344a7f5eSAndroid Build Coastguard Worker ///
674*344a7f5eSAndroid Build Coastguard Worker /// \param __a
675*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
676*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the even-indexed
677*344a7f5eSAndroid Build Coastguard Worker ///    elements of a vector of [4 x double].
678*344a7f5eSAndroid Build Coastguard Worker /// \param __b
679*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
680*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the odd-indexed
681*344a7f5eSAndroid Build Coastguard Worker ///    elements of a vector of [4 x double].
682*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
683*344a7f5eSAndroid Build Coastguard Worker ///    both operands.
684*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_hadd_pd(__m256d __a,__m256d __b)685*344a7f5eSAndroid Build Coastguard Worker _mm256_hadd_pd(__m256d __a, __m256d __b)
686*344a7f5eSAndroid Build Coastguard Worker {
687*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
688*344a7f5eSAndroid Build Coastguard Worker }
689*344a7f5eSAndroid Build Coastguard Worker 
690*344a7f5eSAndroid Build Coastguard Worker /// \brief Horizontally adds the adjacent pairs of values contained in two
691*344a7f5eSAndroid Build Coastguard Worker ///    256-bit vectors of [8 x float].
692*344a7f5eSAndroid Build Coastguard Worker ///
693*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
694*344a7f5eSAndroid Build Coastguard Worker ///
695*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VHADDPS / HADDPS instruction.
696*344a7f5eSAndroid Build Coastguard Worker ///
697*344a7f5eSAndroid Build Coastguard Worker /// \param __a
698*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
699*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the elements with
700*344a7f5eSAndroid Build Coastguard Worker ///    index 0, 1, 4, 5 of a vector of [8 x float].
701*344a7f5eSAndroid Build Coastguard Worker /// \param __b
702*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
703*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the elements with
704*344a7f5eSAndroid Build Coastguard Worker ///    index 2, 3, 6, 7 of a vector of [8 x float].
705*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
706*344a7f5eSAndroid Build Coastguard Worker ///    both operands.
707*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_hadd_ps(__m256 __a,__m256 __b)708*344a7f5eSAndroid Build Coastguard Worker _mm256_hadd_ps(__m256 __a, __m256 __b)
709*344a7f5eSAndroid Build Coastguard Worker {
710*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
711*344a7f5eSAndroid Build Coastguard Worker }
712*344a7f5eSAndroid Build Coastguard Worker 
713*344a7f5eSAndroid Build Coastguard Worker /// \brief Horizontally subtracts the adjacent pairs of values contained in two
714*344a7f5eSAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double].
715*344a7f5eSAndroid Build Coastguard Worker ///
716*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
717*344a7f5eSAndroid Build Coastguard Worker ///
718*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VHSUBPD / HSUBPD instruction.
719*344a7f5eSAndroid Build Coastguard Worker ///
720*344a7f5eSAndroid Build Coastguard Worker /// \param __a
721*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
722*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
723*344a7f5eSAndroid Build Coastguard Worker ///    even-indexed elements of a vector of [4 x double].
724*344a7f5eSAndroid Build Coastguard Worker /// \param __b
725*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
726*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
727*344a7f5eSAndroid Build Coastguard Worker ///    odd-indexed elements of a vector of [4 x double].
728*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the horizontal
729*344a7f5eSAndroid Build Coastguard Worker ///    differences of both operands.
730*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_hsub_pd(__m256d __a,__m256d __b)731*344a7f5eSAndroid Build Coastguard Worker _mm256_hsub_pd(__m256d __a, __m256d __b)
732*344a7f5eSAndroid Build Coastguard Worker {
733*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
734*344a7f5eSAndroid Build Coastguard Worker }
735*344a7f5eSAndroid Build Coastguard Worker 
736*344a7f5eSAndroid Build Coastguard Worker /// \brief Horizontally subtracts the adjacent pairs of values contained in two
737*344a7f5eSAndroid Build Coastguard Worker ///    256-bit vectors of [8 x float].
738*344a7f5eSAndroid Build Coastguard Worker ///
739*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
740*344a7f5eSAndroid Build Coastguard Worker ///
741*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VHSUBPS / HSUBPS instruction.
742*344a7f5eSAndroid Build Coastguard Worker ///
743*344a7f5eSAndroid Build Coastguard Worker /// \param __a
744*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
745*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
746*344a7f5eSAndroid Build Coastguard Worker ///    elements with index 0, 1, 4, 5 of a vector of [8 x float].
747*344a7f5eSAndroid Build Coastguard Worker /// \param __b
748*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
749*344a7f5eSAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
750*344a7f5eSAndroid Build Coastguard Worker ///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
751*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the horizontal
752*344a7f5eSAndroid Build Coastguard Worker ///    differences of both operands.
753*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_hsub_ps(__m256 __a,__m256 __b)754*344a7f5eSAndroid Build Coastguard Worker _mm256_hsub_ps(__m256 __a, __m256 __b)
755*344a7f5eSAndroid Build Coastguard Worker {
756*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
757*344a7f5eSAndroid Build Coastguard Worker }
758*344a7f5eSAndroid Build Coastguard Worker 
759*344a7f5eSAndroid Build Coastguard Worker /* Vector permutations */
760*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 128-bit vector of [2 x double] as specified
761*344a7f5eSAndroid Build Coastguard Worker ///    by the 128-bit integer vector operand.
762*344a7f5eSAndroid Build Coastguard Worker ///
763*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
764*344a7f5eSAndroid Build Coastguard Worker ///
765*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
766*344a7f5eSAndroid Build Coastguard Worker ///
767*344a7f5eSAndroid Build Coastguard Worker /// \param __a
768*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
769*344a7f5eSAndroid Build Coastguard Worker /// \param __c
770*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit integer vector operand specifying how the values are to be
771*344a7f5eSAndroid Build Coastguard Worker ///    copied.
772*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]:
773*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [63:0] of the
774*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
775*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [63:0] of the
776*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
777*344a7f5eSAndroid Build Coastguard Worker ///    Bit [65]:
778*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [127:64] of the
779*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
780*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [127:64] of the
781*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
782*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
783*344a7f5eSAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS
_mm_permutevar_pd(__m128d __a,__m128i __c)784*344a7f5eSAndroid Build Coastguard Worker _mm_permutevar_pd(__m128d __a, __m128i __c)
785*344a7f5eSAndroid Build Coastguard Worker {
786*344a7f5eSAndroid Build Coastguard Worker   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
787*344a7f5eSAndroid Build Coastguard Worker }
788*344a7f5eSAndroid Build Coastguard Worker 
789*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 256-bit vector of [4 x double] as
790*344a7f5eSAndroid Build Coastguard Worker ///    specified by the 256-bit integer vector operand.
791*344a7f5eSAndroid Build Coastguard Worker ///
792*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
793*344a7f5eSAndroid Build Coastguard Worker ///
794*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
795*344a7f5eSAndroid Build Coastguard Worker ///
796*344a7f5eSAndroid Build Coastguard Worker /// \param __a
797*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
798*344a7f5eSAndroid Build Coastguard Worker /// \param __c
799*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector operand specifying how the values are to be
800*344a7f5eSAndroid Build Coastguard Worker ///    copied.
801*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]:
802*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [63:0] of the
803*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
804*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [63:0] of the
805*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
806*344a7f5eSAndroid Build Coastguard Worker ///    Bit [65]:
807*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [127:64] of the
808*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
809*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [127:64] of the
810*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
811*344a7f5eSAndroid Build Coastguard Worker ///    Bit [129]:
812*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [191:128] of the source are copied to bits [191:128] of the
813*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
814*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [255:192] of the source are copied to bits [191:128] of the
815*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
816*344a7f5eSAndroid Build Coastguard Worker ///    Bit [193]:
817*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [191:128] of the source are copied to bits [255:192] of the
818*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
819*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [255:192] of the source are copied to bits [255:192] of the
820*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
821*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
822*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_permutevar_pd(__m256d __a,__m256i __c)823*344a7f5eSAndroid Build Coastguard Worker _mm256_permutevar_pd(__m256d __a, __m256i __c)
824*344a7f5eSAndroid Build Coastguard Worker {
825*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
826*344a7f5eSAndroid Build Coastguard Worker }
827*344a7f5eSAndroid Build Coastguard Worker 
828*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values stored in a 128-bit vector of [4 x float] as
829*344a7f5eSAndroid Build Coastguard Worker ///    specified by the 128-bit integer vector operand.
830*344a7f5eSAndroid Build Coastguard Worker ///
831*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
832*344a7f5eSAndroid Build Coastguard Worker ///
833*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
834*344a7f5eSAndroid Build Coastguard Worker ///
835*344a7f5eSAndroid Build Coastguard Worker /// \param __a
836*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
837*344a7f5eSAndroid Build Coastguard Worker /// \param __c
838*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit integer vector operand specifying how the values are to be
839*344a7f5eSAndroid Build Coastguard Worker ///    copied.
840*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
841*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [31:0] of the
842*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
843*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [31:0] of the
844*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
845*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [31:0] of the
846*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
847*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [31:0] of the
848*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
849*344a7f5eSAndroid Build Coastguard Worker ///    Bits [33:32]:
850*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [63:32] of the
851*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
852*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [63:32] of the
853*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
854*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [63:32] of the
855*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
856*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [63:32] of the
857*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
858*344a7f5eSAndroid Build Coastguard Worker ///    Bits [65:64]:
859*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [95:64] of the
860*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
861*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [95:64] of the
862*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
863*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [95:64] of the
864*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
865*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [95:64] of the
866*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
867*344a7f5eSAndroid Build Coastguard Worker ///    Bits [97:96]:
868*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [127:96] of the
869*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
870*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [127:96] of the
871*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
872*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [127:96] of the
873*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
874*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [127:96] of the
875*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
876*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
877*344a7f5eSAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm_permutevar_ps(__m128 __a,__m128i __c)878*344a7f5eSAndroid Build Coastguard Worker _mm_permutevar_ps(__m128 __a, __m128i __c)
879*344a7f5eSAndroid Build Coastguard Worker {
880*344a7f5eSAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
881*344a7f5eSAndroid Build Coastguard Worker }
882*344a7f5eSAndroid Build Coastguard Worker 
883*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values stored in a 256-bit vector of [8 x float] as
884*344a7f5eSAndroid Build Coastguard Worker ///    specified by the 256-bit integer vector operand.
885*344a7f5eSAndroid Build Coastguard Worker ///
886*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
887*344a7f5eSAndroid Build Coastguard Worker ///
888*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
889*344a7f5eSAndroid Build Coastguard Worker ///
890*344a7f5eSAndroid Build Coastguard Worker /// \param __a
891*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
892*344a7f5eSAndroid Build Coastguard Worker /// \param __c
893*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector operand specifying how the values are to be
894*344a7f5eSAndroid Build Coastguard Worker ///    copied.
895*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
896*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [31:0] of the
897*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
898*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [31:0] of the
899*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
900*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [31:0] of the
901*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
902*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [31:0] of the
903*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
904*344a7f5eSAndroid Build Coastguard Worker ///    Bits [33:32]:
905*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [63:32] of the
906*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
907*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [63:32] of the
908*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
909*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [63:32] of the
910*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
911*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [63:32] of the
912*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
913*344a7f5eSAndroid Build Coastguard Worker ///    Bits [65:64]:
914*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [95:64] of the
915*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
916*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [95:64] of the
917*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
918*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [95:64] of the
919*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
920*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [95:64] of the
921*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
922*344a7f5eSAndroid Build Coastguard Worker ///    Bits [97:96]:
923*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [127:96] of the
924*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
925*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [127:96] of the
926*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
927*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [127:96] of the
928*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
929*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [127:96] of the
930*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
931*344a7f5eSAndroid Build Coastguard Worker ///    Bits [129:128]:
932*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [159:128] of the
933*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
934*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [159:128] of the
935*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
936*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [159:128] of the
937*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
938*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [159:128] of the
939*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
940*344a7f5eSAndroid Build Coastguard Worker ///    Bits [161:160]:
941*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [191:160] of the
942*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
943*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [191:160] of the
944*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
945*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [191:160] of the
946*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
947*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [191:160] of the
948*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
949*344a7f5eSAndroid Build Coastguard Worker ///    Bits [193:192]:
950*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [223:192] of the
951*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
952*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [223:192] of the
953*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
954*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [223:192] of the
955*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
956*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [223:192] of the
957*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
958*344a7f5eSAndroid Build Coastguard Worker ///    Bits [225:224]:
959*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [255:224] of the
960*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
961*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [255:224] of the
962*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
963*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [255:224] of the
964*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
965*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [255:224] of the
966*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
967*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
968*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_permutevar_ps(__m256 __a,__m256i __c)969*344a7f5eSAndroid Build Coastguard Worker _mm256_permutevar_ps(__m256 __a, __m256i __c)
970*344a7f5eSAndroid Build Coastguard Worker {
971*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
972*344a7f5eSAndroid Build Coastguard Worker }
973*344a7f5eSAndroid Build Coastguard Worker 
974*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 128-bit vector of [2 x double] as
975*344a7f5eSAndroid Build Coastguard Worker ///    specified by the immediate integer operand.
976*344a7f5eSAndroid Build Coastguard Worker ///
977*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
978*344a7f5eSAndroid Build Coastguard Worker ///
979*344a7f5eSAndroid Build Coastguard Worker /// \code
980*344a7f5eSAndroid Build Coastguard Worker /// __m128d _mm_permute_pd(__m128d A, const int C);
981*344a7f5eSAndroid Build Coastguard Worker /// \endcode
982*344a7f5eSAndroid Build Coastguard Worker ///
983*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
984*344a7f5eSAndroid Build Coastguard Worker ///
985*344a7f5eSAndroid Build Coastguard Worker /// \param A
986*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
987*344a7f5eSAndroid Build Coastguard Worker /// \param C
988*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
989*344a7f5eSAndroid Build Coastguard Worker ///    Bit [0]:
990*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [63:0] of the
991*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
992*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [63:0] of the
993*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
994*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]:
995*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [127:64] of the
996*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
997*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [127:64] of the
998*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
999*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
1000*344a7f5eSAndroid Build Coastguard Worker #define _mm_permute_pd(A, C) __extension__ ({ \
1001*344a7f5eSAndroid Build Coastguard Worker   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
1002*344a7f5eSAndroid Build Coastguard Worker                                    (__v2df)_mm_undefined_pd(), \
1003*344a7f5eSAndroid Build Coastguard Worker                                    ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); })
1004*344a7f5eSAndroid Build Coastguard Worker 
1005*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 256-bit vector of [4 x double] as
1006*344a7f5eSAndroid Build Coastguard Worker ///    specified by the immediate integer operand.
1007*344a7f5eSAndroid Build Coastguard Worker ///
1008*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1009*344a7f5eSAndroid Build Coastguard Worker ///
1010*344a7f5eSAndroid Build Coastguard Worker /// \code
1011*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_permute_pd(__m256d A, const int C);
1012*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1013*344a7f5eSAndroid Build Coastguard Worker ///
1014*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
1015*344a7f5eSAndroid Build Coastguard Worker ///
1016*344a7f5eSAndroid Build Coastguard Worker /// \param A
1017*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1018*344a7f5eSAndroid Build Coastguard Worker /// \param C
1019*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
1020*344a7f5eSAndroid Build Coastguard Worker ///    Bit [0]:
1021*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [63:0] of the
1022*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1023*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [63:0] of the
1024*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1025*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]:
1026*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [63:0] of the source are copied to bits [127:64] of the
1027*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1028*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [127:64] of the source are copied to bits [127:64] of the
1029*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1030*344a7f5eSAndroid Build Coastguard Worker ///    Bit [2]:
1031*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [191:128] of the source are copied to bits [191:128] of the
1032*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1033*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [255:192] of the source are copied to bits [191:128] of the
1034*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1035*344a7f5eSAndroid Build Coastguard Worker ///    Bit [3]:
1036*344a7f5eSAndroid Build Coastguard Worker ///    0: Bits [191:128] of the source are copied to bits [255:192] of the
1037*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1038*344a7f5eSAndroid Build Coastguard Worker ///    1: Bits [255:192] of the source are copied to bits [255:192] of the
1039*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1040*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1041*344a7f5eSAndroid Build Coastguard Worker #define _mm256_permute_pd(A, C) __extension__ ({ \
1042*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
1043*344a7f5eSAndroid Build Coastguard Worker                                    (__v4df)_mm256_undefined_pd(), \
1044*344a7f5eSAndroid Build Coastguard Worker                                    0 + (((C) >> 0) & 0x1), \
1045*344a7f5eSAndroid Build Coastguard Worker                                    0 + (((C) >> 1) & 0x1), \
1046*344a7f5eSAndroid Build Coastguard Worker                                    2 + (((C) >> 2) & 0x1), \
1047*344a7f5eSAndroid Build Coastguard Worker                                    2 + (((C) >> 3) & 0x1)); })
1048*344a7f5eSAndroid Build Coastguard Worker 
1049*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 128-bit vector of [4 x float] as
1050*344a7f5eSAndroid Build Coastguard Worker ///    specified by the immediate integer operand.
1051*344a7f5eSAndroid Build Coastguard Worker ///
1052*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1053*344a7f5eSAndroid Build Coastguard Worker ///
1054*344a7f5eSAndroid Build Coastguard Worker /// \code
1055*344a7f5eSAndroid Build Coastguard Worker /// __m128 _mm_permute_ps(__m128 A, const int C);
1056*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1057*344a7f5eSAndroid Build Coastguard Worker ///
1058*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
1059*344a7f5eSAndroid Build Coastguard Worker ///
1060*344a7f5eSAndroid Build Coastguard Worker /// \param A
1061*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1062*344a7f5eSAndroid Build Coastguard Worker /// \param C
1063*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
1064*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1065*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [31:0] of the
1066*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1067*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [31:0] of the
1068*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1069*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [31:0] of the
1070*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1071*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [31:0] of the
1072*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1073*344a7f5eSAndroid Build Coastguard Worker ///    Bits [3:2]:
1074*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [63:32] of the
1075*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1076*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [63:32] of the
1077*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1078*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [63:32] of the
1079*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1080*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [63:32] of the
1081*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1082*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1083*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [95:64] of the
1084*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1085*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [95:64] of the
1086*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1087*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [95:64] of the
1088*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1089*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [95:64] of the
1090*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1091*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:6]:
1092*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [127:96] of the
1093*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1094*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [127:96] of the
1095*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1096*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [127:96] of the
1097*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1098*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [127:96] of the
1099*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1100*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
1101*344a7f5eSAndroid Build Coastguard Worker #define _mm_permute_ps(A, C) __extension__ ({ \
1102*344a7f5eSAndroid Build Coastguard Worker   (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
1103*344a7f5eSAndroid Build Coastguard Worker                                   (__v4sf)_mm_undefined_ps(), \
1104*344a7f5eSAndroid Build Coastguard Worker                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
1105*344a7f5eSAndroid Build Coastguard Worker                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
1106*344a7f5eSAndroid Build Coastguard Worker 
1107*344a7f5eSAndroid Build Coastguard Worker /// \brief Copies the values in a 256-bit vector of [8 x float] as
1108*344a7f5eSAndroid Build Coastguard Worker ///    specified by the immediate integer operand.
1109*344a7f5eSAndroid Build Coastguard Worker ///
1110*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1111*344a7f5eSAndroid Build Coastguard Worker ///
1112*344a7f5eSAndroid Build Coastguard Worker /// \code
1113*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_permute_ps(__m256 A, const int C);
1114*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1115*344a7f5eSAndroid Build Coastguard Worker ///
1116*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
1117*344a7f5eSAndroid Build Coastguard Worker ///
1118*344a7f5eSAndroid Build Coastguard Worker /// \param A
1119*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1120*344a7f5eSAndroid Build Coastguard Worker /// \param C
1121*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
1122*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1123*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [31:0] of the
1124*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1125*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [31:0] of the
1126*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1127*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [31:0] of the
1128*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1129*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [31:0] of the
1130*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1131*344a7f5eSAndroid Build Coastguard Worker ///    Bits [3:2]:
1132*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [63:32] of the
1133*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1134*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [63:32] of the
1135*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1136*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [63:32] of the
1137*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1138*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [63:32] of the
1139*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1140*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1141*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [95:64] of the
1142*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1143*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [95:64] of the
1144*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1145*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [95:64] of the
1146*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1147*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [95:64] of the
1148*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1149*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:6]:
1150*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] of the source are copied to bits [127:96] of the
1151*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1152*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] of the source are copied to bits [127:96] of the
1153*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1154*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] of the source are copied to bits [127:96] of the
1155*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1156*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] of the source are copied to bits [127:96] of the
1157*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1158*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1159*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [159:128] of the
1160*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1161*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [159:128] of the
1162*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1163*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [159:128] of the
1164*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1165*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [159:128] of the
1166*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1167*344a7f5eSAndroid Build Coastguard Worker ///    Bits [3:2]:
1168*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [191:160] of the
1169*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1170*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [191:160] of the
1171*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1172*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [191:160] of the
1173*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1174*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [191:160] of the
1175*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1176*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1177*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [223:192] of the
1178*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1179*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [223:192] of the
1180*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1181*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [223:192] of the
1182*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1183*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [223:192] of the
1184*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1185*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:6]:
1186*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [159:128] of the source are copied to bits [255:224] of the
1187*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1188*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [191:160] of the source are copied to bits [255:224] of the
1189*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1190*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [223:192] of the source are copied to bits [255:224] of the
1191*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1192*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:224] of the source are copied to bits [255:224] of the
1193*344a7f5eSAndroid Build Coastguard Worker ///    returned vector.
1194*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1195*344a7f5eSAndroid Build Coastguard Worker #define _mm256_permute_ps(A, C) __extension__ ({ \
1196*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
1197*344a7f5eSAndroid Build Coastguard Worker                                   (__v8sf)_mm256_undefined_ps(), \
1198*344a7f5eSAndroid Build Coastguard Worker                                   0 + (((C) >> 0) & 0x3), \
1199*344a7f5eSAndroid Build Coastguard Worker                                   0 + (((C) >> 2) & 0x3), \
1200*344a7f5eSAndroid Build Coastguard Worker                                   0 + (((C) >> 4) & 0x3), \
1201*344a7f5eSAndroid Build Coastguard Worker                                   0 + (((C) >> 6) & 0x3), \
1202*344a7f5eSAndroid Build Coastguard Worker                                   4 + (((C) >> 0) & 0x3), \
1203*344a7f5eSAndroid Build Coastguard Worker                                   4 + (((C) >> 2) & 0x3), \
1204*344a7f5eSAndroid Build Coastguard Worker                                   4 + (((C) >> 4) & 0x3), \
1205*344a7f5eSAndroid Build Coastguard Worker                                   4 + (((C) >> 6) & 0x3)); })
1206*344a7f5eSAndroid Build Coastguard Worker 
1207*344a7f5eSAndroid Build Coastguard Worker /// \brief Permutes 128-bit data values stored in two 256-bit vectors of
1208*344a7f5eSAndroid Build Coastguard Worker ///    [4 x double], as specified by the immediate integer operand.
1209*344a7f5eSAndroid Build Coastguard Worker ///
1210*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1211*344a7f5eSAndroid Build Coastguard Worker ///
1212*344a7f5eSAndroid Build Coastguard Worker /// \code
1213*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);
1214*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1215*344a7f5eSAndroid Build Coastguard Worker ///
1216*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
1217*344a7f5eSAndroid Build Coastguard Worker ///
1218*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1219*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1220*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1221*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double.
1222*344a7f5eSAndroid Build Coastguard Worker /// \param M
1223*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1224*344a7f5eSAndroid Build Coastguard Worker ///    permuted.
1225*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1226*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
1227*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1228*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
1229*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1230*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
1231*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1232*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
1233*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1234*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1235*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
1236*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1237*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
1238*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1239*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
1240*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1241*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
1242*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1243*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1244*344a7f5eSAndroid Build Coastguard Worker #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
1245*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
1246*344a7f5eSAndroid Build Coastguard Worker                                            (__v4df)(__m256d)(V2), (M)); })
1247*344a7f5eSAndroid Build Coastguard Worker 
1248*344a7f5eSAndroid Build Coastguard Worker /// \brief Permutes 128-bit data values stored in two 256-bit vectors of
1249*344a7f5eSAndroid Build Coastguard Worker ///    [8 x float], as specified by the immediate integer operand.
1250*344a7f5eSAndroid Build Coastguard Worker ///
1251*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1252*344a7f5eSAndroid Build Coastguard Worker ///
1253*344a7f5eSAndroid Build Coastguard Worker /// \code
1254*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);
1255*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1256*344a7f5eSAndroid Build Coastguard Worker ///
1257*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
1258*344a7f5eSAndroid Build Coastguard Worker ///
1259*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1260*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1261*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1262*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1263*344a7f5eSAndroid Build Coastguard Worker /// \param M
1264*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1265*344a7f5eSAndroid Build Coastguard Worker ///    permuted.
1266*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1267*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
1268*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1269*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
1270*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1271*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
1272*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1273*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
1274*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1275*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1276*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
1277*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1278*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
1279*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1280*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
1281*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1282*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
1283*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1284*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1285*344a7f5eSAndroid Build Coastguard Worker #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
1286*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
1287*344a7f5eSAndroid Build Coastguard Worker                                           (__v8sf)(__m256)(V2), (M)); })
1288*344a7f5eSAndroid Build Coastguard Worker 
1289*344a7f5eSAndroid Build Coastguard Worker /// \brief Permutes 128-bit data values stored in two 256-bit integer vectors,
1290*344a7f5eSAndroid Build Coastguard Worker ///    as specified by the immediate integer operand.
1291*344a7f5eSAndroid Build Coastguard Worker ///
1292*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1293*344a7f5eSAndroid Build Coastguard Worker ///
1294*344a7f5eSAndroid Build Coastguard Worker /// \code
1295*344a7f5eSAndroid Build Coastguard Worker /// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);
1296*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1297*344a7f5eSAndroid Build Coastguard Worker ///
1298*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
1299*344a7f5eSAndroid Build Coastguard Worker ///
1300*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1301*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector.
1302*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1303*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector.
1304*344a7f5eSAndroid Build Coastguard Worker /// \param M
1305*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
1306*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0]:
1307*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
1308*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1309*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
1310*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1311*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
1312*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1313*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
1314*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1315*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4]:
1316*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
1317*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1318*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
1319*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1320*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
1321*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1322*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
1323*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1324*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the copied values.
1325*344a7f5eSAndroid Build Coastguard Worker #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
1326*344a7f5eSAndroid Build Coastguard Worker   (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
1327*344a7f5eSAndroid Build Coastguard Worker                                            (__v8si)(__m256i)(V2), (M)); })
1328*344a7f5eSAndroid Build Coastguard Worker 
1329*344a7f5eSAndroid Build Coastguard Worker /* Vector Blend */
1330*344a7f5eSAndroid Build Coastguard Worker /// \brief Merges 64-bit double-precision data values stored in either of the
1331*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double], as specified by the immediate
1332*344a7f5eSAndroid Build Coastguard Worker ///    integer operand.
1333*344a7f5eSAndroid Build Coastguard Worker ///
1334*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1335*344a7f5eSAndroid Build Coastguard Worker ///
1336*344a7f5eSAndroid Build Coastguard Worker /// \code
1337*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);
1338*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1339*344a7f5eSAndroid Build Coastguard Worker ///
1340*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VBLENDPD / BLENDPD instruction.
1341*344a7f5eSAndroid Build Coastguard Worker ///
1342*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1343*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1344*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1345*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1346*344a7f5eSAndroid Build Coastguard Worker /// \param M
1347*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with mask bits [3:0] specifying how the
1348*344a7f5eSAndroid Build Coastguard Worker ///    values are to be copied. The position of the mask bit corresponds to the
1349*344a7f5eSAndroid Build Coastguard Worker ///    index of a copied value. When a mask bit is 0, the corresponding 64-bit
1350*344a7f5eSAndroid Build Coastguard Worker ///    element in operand V1 is copied to the same position in the destination.
1351*344a7f5eSAndroid Build Coastguard Worker ///    When a mask bit is 1, the corresponding 64-bit element in operand V2 is
1352*344a7f5eSAndroid Build Coastguard Worker ///    copied to the same position in the destination.
1353*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1354*344a7f5eSAndroid Build Coastguard Worker #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
1355*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
1356*344a7f5eSAndroid Build Coastguard Worker                                    (__v4df)(__m256d)(V2), \
1357*344a7f5eSAndroid Build Coastguard Worker                                    (((M) & 0x01) ? 4 : 0), \
1358*344a7f5eSAndroid Build Coastguard Worker                                    (((M) & 0x02) ? 5 : 1), \
1359*344a7f5eSAndroid Build Coastguard Worker                                    (((M) & 0x04) ? 6 : 2), \
1360*344a7f5eSAndroid Build Coastguard Worker                                    (((M) & 0x08) ? 7 : 3)); })
1361*344a7f5eSAndroid Build Coastguard Worker 
1362*344a7f5eSAndroid Build Coastguard Worker /// \brief Merges 32-bit single-precision data values stored in either of the
1363*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float], as specified by the immediate
1364*344a7f5eSAndroid Build Coastguard Worker ///    integer operand.
1365*344a7f5eSAndroid Build Coastguard Worker ///
1366*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1367*344a7f5eSAndroid Build Coastguard Worker ///
1368*344a7f5eSAndroid Build Coastguard Worker /// \code
1369*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);
1370*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1371*344a7f5eSAndroid Build Coastguard Worker ///
1372*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VBLENDPS / BLENDPS instruction.
1373*344a7f5eSAndroid Build Coastguard Worker ///
1374*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1375*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1376*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1377*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1378*344a7f5eSAndroid Build Coastguard Worker /// \param M
1379*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with mask bits [7:0] specifying how the
1380*344a7f5eSAndroid Build Coastguard Worker ///    values are to be copied. The position of the mask bit corresponds to the
1381*344a7f5eSAndroid Build Coastguard Worker ///    index of a copied value. When a mask bit is 0, the corresponding 32-bit
1382*344a7f5eSAndroid Build Coastguard Worker ///    element in operand V1 is copied to the same position in the destination.
1383*344a7f5eSAndroid Build Coastguard Worker ///    When a mask bit is 1, the corresponding 32-bit element in operand V2 is
1384*344a7f5eSAndroid Build Coastguard Worker ///    copied to the same position in the destination.
1385*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1386*344a7f5eSAndroid Build Coastguard Worker #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
1387*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
1388*344a7f5eSAndroid Build Coastguard Worker                                   (__v8sf)(__m256)(V2), \
1389*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x01) ?  8 : 0), \
1390*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x02) ?  9 : 1), \
1391*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x04) ? 10 : 2), \
1392*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x08) ? 11 : 3), \
1393*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x10) ? 12 : 4), \
1394*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x20) ? 13 : 5), \
1395*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x40) ? 14 : 6), \
1396*344a7f5eSAndroid Build Coastguard Worker                                   (((M) & 0x80) ? 15 : 7)); })
1397*344a7f5eSAndroid Build Coastguard Worker 
1398*344a7f5eSAndroid Build Coastguard Worker /// \brief Merges 64-bit double-precision data values stored in either of the
1399*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double], as specified by the 256-bit vector
1400*344a7f5eSAndroid Build Coastguard Worker ///    operand.
1401*344a7f5eSAndroid Build Coastguard Worker ///
1402*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1403*344a7f5eSAndroid Build Coastguard Worker ///
1404*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VBLENDVPD / BLENDVPD instruction.
1405*344a7f5eSAndroid Build Coastguard Worker ///
1406*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1407*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1408*344a7f5eSAndroid Build Coastguard Worker /// \param __b
1409*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1410*344a7f5eSAndroid Build Coastguard Worker /// \param __c
1411*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying
1412*344a7f5eSAndroid Build Coastguard Worker ///    how the values are to be copied. The position of the mask bit corresponds
1413*344a7f5eSAndroid Build Coastguard Worker ///    to the most significant bit of a copied value. When a mask bit is 0, the
1414*344a7f5eSAndroid Build Coastguard Worker ///    corresponding 64-bit element in operand __a is copied to the same
1415*344a7f5eSAndroid Build Coastguard Worker ///    position in the destination. When a mask bit is 1, the corresponding
1416*344a7f5eSAndroid Build Coastguard Worker ///    64-bit element in operand __b is copied to the same position in the
1417*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1418*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1419*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_blendv_pd(__m256d __a,__m256d __b,__m256d __c)1420*344a7f5eSAndroid Build Coastguard Worker _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1421*344a7f5eSAndroid Build Coastguard Worker {
1422*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_blendvpd256(
1423*344a7f5eSAndroid Build Coastguard Worker     (__v4df)__a, (__v4df)__b, (__v4df)__c);
1424*344a7f5eSAndroid Build Coastguard Worker }
1425*344a7f5eSAndroid Build Coastguard Worker 
1426*344a7f5eSAndroid Build Coastguard Worker /// \brief Merges 32-bit single-precision data values stored in either of the
1427*344a7f5eSAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float], as specified by the 256-bit vector
1428*344a7f5eSAndroid Build Coastguard Worker ///    operand.
1429*344a7f5eSAndroid Build Coastguard Worker ///
1430*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1431*344a7f5eSAndroid Build Coastguard Worker ///
1432*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VBLENDVPS / BLENDVPS instruction.
1433*344a7f5eSAndroid Build Coastguard Worker ///
1434*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1435*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1436*344a7f5eSAndroid Build Coastguard Worker /// \param __b
1437*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1438*344a7f5eSAndroid Build Coastguard Worker /// \param __c
1439*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,
1440*344a7f5eSAndroid Build Coastguard Worker ///    and 31 specifying how the values are to be copied. The position of the
1441*344a7f5eSAndroid Build Coastguard Worker ///    mask bit corresponds to the most significant bit of a copied value. When
1442*344a7f5eSAndroid Build Coastguard Worker ///    a mask bit is 0, the corresponding 32-bit element in operand __a is
1443*344a7f5eSAndroid Build Coastguard Worker ///    copied to the same position in the destination. When a mask bit is 1, the
1444*344a7f5eSAndroid Build Coastguard Worker ///    corresponding 32-bit element in operand __b is copied to the same
1445*344a7f5eSAndroid Build Coastguard Worker ///    position in the destination.
1446*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1447*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_blendv_ps(__m256 __a,__m256 __b,__m256 __c)1448*344a7f5eSAndroid Build Coastguard Worker _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1449*344a7f5eSAndroid Build Coastguard Worker {
1450*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_blendvps256(
1451*344a7f5eSAndroid Build Coastguard Worker     (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
1452*344a7f5eSAndroid Build Coastguard Worker }
1453*344a7f5eSAndroid Build Coastguard Worker 
1454*344a7f5eSAndroid Build Coastguard Worker /* Vector Dot Product */
1455*344a7f5eSAndroid Build Coastguard Worker /// \brief Computes two dot products in parallel, using the lower and upper
1456*344a7f5eSAndroid Build Coastguard Worker ///    halves of two [8 x float] vectors as input to the two computations, and
1457*344a7f5eSAndroid Build Coastguard Worker ///    returning the two dot products in the lower and upper halves of the
1458*344a7f5eSAndroid Build Coastguard Worker ///    [8 x float] result. The immediate integer operand controls which
1459*344a7f5eSAndroid Build Coastguard Worker ///    input elements will contribute to the dot product, and where the final
1460*344a7f5eSAndroid Build Coastguard Worker ///    results are returned. In general, for each dot product, the four
1461*344a7f5eSAndroid Build Coastguard Worker ///    corresponding elements of the input vectors are multiplied; the first
1462*344a7f5eSAndroid Build Coastguard Worker ///    two and second two products are summed, then the two sums are added to
1463*344a7f5eSAndroid Build Coastguard Worker ///    form the final result.
1464*344a7f5eSAndroid Build Coastguard Worker ///
1465*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1466*344a7f5eSAndroid Build Coastguard Worker ///
1467*344a7f5eSAndroid Build Coastguard Worker /// \code
1468*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);
1469*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1470*344a7f5eSAndroid Build Coastguard Worker ///
1471*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VDPPS / DPPS instruction.
1472*344a7f5eSAndroid Build Coastguard Worker ///
1473*344a7f5eSAndroid Build Coastguard Worker /// \param V1
1474*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1475*344a7f5eSAndroid Build Coastguard Worker /// \param V2
1476*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1477*344a7f5eSAndroid Build Coastguard Worker /// \param M
1478*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer argument. Bits [7:4] determine which elements of
1479*344a7f5eSAndroid Build Coastguard Worker ///    the input vectors are used, with bit [4] corresponding to the lowest
1480*344a7f5eSAndroid Build Coastguard Worker ///    element and bit [7] corresponding to the highest element of each [4 x
1481*344a7f5eSAndroid Build Coastguard Worker ///    float] subvector. If a bit is set, the corresponding elements from the
1482*344a7f5eSAndroid Build Coastguard Worker ///    two input vectors are used as an input for dot product; otherwise that
1483*344a7f5eSAndroid Build Coastguard Worker ///    input is treated as zero. Bits [3:0] determine which elements of the
1484*344a7f5eSAndroid Build Coastguard Worker ///    result will receive a copy of the final dot product, with bit [0]
1485*344a7f5eSAndroid Build Coastguard Worker ///    corresponding to the lowest element and bit [3] corresponding to the
1486*344a7f5eSAndroid Build Coastguard Worker ///    highest element of each [4 x float] subvector. If a bit is set, the dot
1487*344a7f5eSAndroid Build Coastguard Worker ///    product is returned in the corresponding element; otherwise that element
1488*344a7f5eSAndroid Build Coastguard Worker ///    is set to zero. The bitmask is applied in the same way to each of the
1489*344a7f5eSAndroid Build Coastguard Worker ///    two parallel dot product computations.
1490*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the two dot products.
1491*344a7f5eSAndroid Build Coastguard Worker #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
1492*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
1493*344a7f5eSAndroid Build Coastguard Worker                                  (__v8sf)(__m256)(V2), (M)); })
1494*344a7f5eSAndroid Build Coastguard Worker 
1495*344a7f5eSAndroid Build Coastguard Worker /* Vector shuffle */
1496*344a7f5eSAndroid Build Coastguard Worker /// \brief Selects 8 float values from the 256-bit operands of [8 x float], as
1497*344a7f5eSAndroid Build Coastguard Worker ///    specified by the immediate value operand. The four selected elements in
1498*344a7f5eSAndroid Build Coastguard Worker ///    each operand are copied to the destination according to the bits
1499*344a7f5eSAndroid Build Coastguard Worker ///    specified in the immediate operand. The selected elements from the first
1500*344a7f5eSAndroid Build Coastguard Worker ///    256-bit operand are copied to bits [63:0] and bits [191:128] of the
1501*344a7f5eSAndroid Build Coastguard Worker ///    destination, and the selected elements from the second 256-bit operand
1502*344a7f5eSAndroid Build Coastguard Worker ///    are copied to bits [127:64] and bits [255:192] of the destination. For
1503*344a7f5eSAndroid Build Coastguard Worker ///    example, if bits [7:0] of the immediate operand contain a value of 0xFF,
1504*344a7f5eSAndroid Build Coastguard Worker ///    the 256-bit destination vector would contain the following values: b[7],
1505*344a7f5eSAndroid Build Coastguard Worker ///    b[7], a[7], a[7], b[3], b[3], a[3], a[3].
1506*344a7f5eSAndroid Build Coastguard Worker ///
1507*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1508*344a7f5eSAndroid Build Coastguard Worker ///
1509*344a7f5eSAndroid Build Coastguard Worker /// \code
1510*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);
1511*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1512*344a7f5eSAndroid Build Coastguard Worker ///
1513*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSHUFPS / SHUFPS instruction.
1514*344a7f5eSAndroid Build Coastguard Worker ///
1515*344a7f5eSAndroid Build Coastguard Worker /// \param a
1516*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. The four selected elements in this
1517*344a7f5eSAndroid Build Coastguard Worker ///    operand are copied to bits [63:0] and bits [191:128] in the destination,
1518*344a7f5eSAndroid Build Coastguard Worker ///    according to the bits specified in the immediate operand.
1519*344a7f5eSAndroid Build Coastguard Worker /// \param b
1520*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. The four selected elements in this
1521*344a7f5eSAndroid Build Coastguard Worker ///    operand are copied to bits [127:64] and bits [255:192] in the
1522*344a7f5eSAndroid Build Coastguard Worker ///    destination, according to the bits specified in the immediate operand.
1523*344a7f5eSAndroid Build Coastguard Worker /// \param mask
1524*344a7f5eSAndroid Build Coastguard Worker ///    An immediate value containing an 8-bit value specifying which elements to
1525*344a7f5eSAndroid Build Coastguard Worker ///    copy from a and b. Bits [3:0] specify the values copied from operand a.
1526*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:4] specify the values copied from operand b.
1527*344a7f5eSAndroid Build Coastguard Worker ///    The destinations within the 256-bit destination are assigned values as
1528*344a7f5eSAndroid Build Coastguard Worker ///    follows, according to the bit value assignments described below:
1529*344a7f5eSAndroid Build Coastguard Worker ///    Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the
1530*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1531*344a7f5eSAndroid Build Coastguard Worker ///    Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the
1532*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1533*344a7f5eSAndroid Build Coastguard Worker ///    Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the
1534*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1535*344a7f5eSAndroid Build Coastguard Worker ///    Bits [7:6] are used to assign values to bits [127:96] and [255:224] in
1536*344a7f5eSAndroid Build Coastguard Worker ///    the destination.
1537*344a7f5eSAndroid Build Coastguard Worker ///    Bit value assignments:
1538*344a7f5eSAndroid Build Coastguard Worker ///    00: Bits [31:0] and [159:128] are copied from the selected operand.
1539*344a7f5eSAndroid Build Coastguard Worker ///    01: Bits [63:32] and [191:160] are copied from the selected operand.
1540*344a7f5eSAndroid Build Coastguard Worker ///    10: Bits [95:64] and [223:192] are copied from the selected operand.
1541*344a7f5eSAndroid Build Coastguard Worker ///    11: Bits [127:96] and [255:224] are copied from the selected operand.
1542*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the shuffled values.
1543*344a7f5eSAndroid Build Coastguard Worker #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
1544*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
1545*344a7f5eSAndroid Build Coastguard Worker                                   (__v8sf)(__m256)(b), \
1546*344a7f5eSAndroid Build Coastguard Worker                                   0  + (((mask) >> 0) & 0x3), \
1547*344a7f5eSAndroid Build Coastguard Worker                                   0  + (((mask) >> 2) & 0x3), \
1548*344a7f5eSAndroid Build Coastguard Worker                                   8  + (((mask) >> 4) & 0x3), \
1549*344a7f5eSAndroid Build Coastguard Worker                                   8  + (((mask) >> 6) & 0x3), \
1550*344a7f5eSAndroid Build Coastguard Worker                                   4  + (((mask) >> 0) & 0x3), \
1551*344a7f5eSAndroid Build Coastguard Worker                                   4  + (((mask) >> 2) & 0x3), \
1552*344a7f5eSAndroid Build Coastguard Worker                                   12 + (((mask) >> 4) & 0x3), \
1553*344a7f5eSAndroid Build Coastguard Worker                                   12 + (((mask) >> 6) & 0x3)); })
1554*344a7f5eSAndroid Build Coastguard Worker 
1555*344a7f5eSAndroid Build Coastguard Worker /// \brief Selects four double-precision values from the 256-bit operands of
1556*344a7f5eSAndroid Build Coastguard Worker ///    [4 x double], as specified by the immediate value operand. The selected
1557*344a7f5eSAndroid Build Coastguard Worker ///    elements from the first 256-bit operand are copied to bits [63:0] and
1558*344a7f5eSAndroid Build Coastguard Worker ///    bits [191:128] in the destination, and the selected elements from the
1559*344a7f5eSAndroid Build Coastguard Worker ///    second 256-bit operand are copied to bits [127:64] and bits [255:192] in
1560*344a7f5eSAndroid Build Coastguard Worker ///    the destination. For example, if bits [3:0] of the immediate operand
1561*344a7f5eSAndroid Build Coastguard Worker ///    contain a value of 0xF, the 256-bit destination vector would contain the
1562*344a7f5eSAndroid Build Coastguard Worker ///    following values: b[3], a[3], b[1], a[1].
1563*344a7f5eSAndroid Build Coastguard Worker ///
1564*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1565*344a7f5eSAndroid Build Coastguard Worker ///
1566*344a7f5eSAndroid Build Coastguard Worker /// \code
1567*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);
1568*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1569*344a7f5eSAndroid Build Coastguard Worker ///
1570*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VSHUFPD / SHUFPD instruction.
1571*344a7f5eSAndroid Build Coastguard Worker ///
1572*344a7f5eSAndroid Build Coastguard Worker /// \param a
1573*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1574*344a7f5eSAndroid Build Coastguard Worker /// \param b
1575*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1576*344a7f5eSAndroid Build Coastguard Worker /// \param mask
1577*344a7f5eSAndroid Build Coastguard Worker ///    An immediate value containing 8-bit values specifying which elements to
1578*344a7f5eSAndroid Build Coastguard Worker ///    copy from a and b:
1579*344a7f5eSAndroid Build Coastguard Worker ///    Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the
1580*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1581*344a7f5eSAndroid Build Coastguard Worker ///    Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the
1582*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1583*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the
1584*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1585*344a7f5eSAndroid Build Coastguard Worker ///    Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the
1586*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1587*344a7f5eSAndroid Build Coastguard Worker ///    Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the
1588*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1589*344a7f5eSAndroid Build Coastguard Worker ///    Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the
1590*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1591*344a7f5eSAndroid Build Coastguard Worker ///    Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the
1592*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1593*344a7f5eSAndroid Build Coastguard Worker ///    Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the
1594*344a7f5eSAndroid Build Coastguard Worker ///    destination.
1595*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the shuffled values.
1596*344a7f5eSAndroid Build Coastguard Worker #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
1597*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
1598*344a7f5eSAndroid Build Coastguard Worker                                    (__v4df)(__m256d)(b), \
1599*344a7f5eSAndroid Build Coastguard Worker                                    0 + (((mask) >> 0) & 0x1), \
1600*344a7f5eSAndroid Build Coastguard Worker                                    4 + (((mask) >> 1) & 0x1), \
1601*344a7f5eSAndroid Build Coastguard Worker                                    2 + (((mask) >> 2) & 0x1), \
1602*344a7f5eSAndroid Build Coastguard Worker                                    6 + (((mask) >> 3) & 0x1)); })
1603*344a7f5eSAndroid Build Coastguard Worker 
1604*344a7f5eSAndroid Build Coastguard Worker /* Compare */
1605*344a7f5eSAndroid Build Coastguard Worker #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
1606*344a7f5eSAndroid Build Coastguard Worker #define _CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
1607*344a7f5eSAndroid Build Coastguard Worker #define _CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
1608*344a7f5eSAndroid Build Coastguard Worker #define _CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
1609*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
1610*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
1611*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
1612*344a7f5eSAndroid Build Coastguard Worker #define _CMP_ORD_Q    0x07 /* Ordered (nonsignaling)   */
1613*344a7f5eSAndroid Build Coastguard Worker #define _CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
1614*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling)  */
1615*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
1616*344a7f5eSAndroid Build Coastguard Worker #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
1617*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
1618*344a7f5eSAndroid Build Coastguard Worker #define _CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered, signaling)  */
1619*344a7f5eSAndroid Build Coastguard Worker #define _CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
1620*344a7f5eSAndroid Build Coastguard Worker #define _CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
1621*344a7f5eSAndroid Build Coastguard Worker #define _CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
1622*344a7f5eSAndroid Build Coastguard Worker #define _CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
1623*344a7f5eSAndroid Build Coastguard Worker #define _CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered, non-signaling)  */
1624*344a7f5eSAndroid Build Coastguard Worker #define _CMP_UNORD_S  0x13 /* Unordered (signaling)  */
1625*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
1626*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling)  */
1627*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling)  */
1628*344a7f5eSAndroid Build Coastguard Worker #define _CMP_ORD_S    0x17 /* Ordered (signaling)  */
1629*344a7f5eSAndroid Build Coastguard Worker #define _CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
1630*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign)  */
1631*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling)  */
1632*344a7f5eSAndroid Build Coastguard Worker #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
1633*344a7f5eSAndroid Build Coastguard Worker #define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
1634*344a7f5eSAndroid Build Coastguard Worker #define _CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered, non-signaling)  */
1635*344a7f5eSAndroid Build Coastguard Worker #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
1636*344a7f5eSAndroid Build Coastguard Worker #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
1637*344a7f5eSAndroid Build Coastguard Worker 
1638*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding double-precision values of two
1639*344a7f5eSAndroid Build Coastguard Worker ///    128-bit vectors of [2 x double], using the operation specified by the
1640*344a7f5eSAndroid Build Coastguard Worker ///    immediate integer operand. Returns a [2 x double] vector consisting of
1641*344a7f5eSAndroid Build Coastguard Worker ///    two doubles corresponding to the two comparison results: zero if the
1642*344a7f5eSAndroid Build Coastguard Worker ///    comparison is false, and all 1's if the comparison is true.
1643*344a7f5eSAndroid Build Coastguard Worker ///
1644*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1645*344a7f5eSAndroid Build Coastguard Worker ///
1646*344a7f5eSAndroid Build Coastguard Worker /// \code
1647*344a7f5eSAndroid Build Coastguard Worker /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
1648*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1649*344a7f5eSAndroid Build Coastguard Worker ///
1650*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPPD / CMPPD instruction.
1651*344a7f5eSAndroid Build Coastguard Worker ///
1652*344a7f5eSAndroid Build Coastguard Worker /// \param a
1653*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1654*344a7f5eSAndroid Build Coastguard Worker /// \param b
1655*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1656*344a7f5eSAndroid Build Coastguard Worker /// \param c
1657*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1658*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1659*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1660*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1661*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1662*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1663*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1664*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1665*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1666*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1667*344a7f5eSAndroid Build Coastguard Worker ///                        (swapped operands)
1668*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1669*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1670*344a7f5eSAndroid Build Coastguard Worker #define _mm_cmp_pd(a, b, c) __extension__ ({ \
1671*344a7f5eSAndroid Build Coastguard Worker   (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
1672*344a7f5eSAndroid Build Coastguard Worker                                 (__v2df)(__m128d)(b), (c)); })
1673*344a7f5eSAndroid Build Coastguard Worker 
1674*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding values of two 128-bit vectors of
1675*344a7f5eSAndroid Build Coastguard Worker ///    [4 x float], using the operation specified by the immediate integer
1676*344a7f5eSAndroid Build Coastguard Worker ///    operand. Returns a [4 x float] vector consisting of four floats
1677*344a7f5eSAndroid Build Coastguard Worker ///    corresponding to the four comparison results: zero if the comparison is
1678*344a7f5eSAndroid Build Coastguard Worker ///    false, and all 1's if the comparison is true.
1679*344a7f5eSAndroid Build Coastguard Worker ///
1680*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1681*344a7f5eSAndroid Build Coastguard Worker ///
1682*344a7f5eSAndroid Build Coastguard Worker /// \code
1683*344a7f5eSAndroid Build Coastguard Worker /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
1684*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1685*344a7f5eSAndroid Build Coastguard Worker ///
1686*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPPS / CMPPS instruction.
1687*344a7f5eSAndroid Build Coastguard Worker ///
1688*344a7f5eSAndroid Build Coastguard Worker /// \param a
1689*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1690*344a7f5eSAndroid Build Coastguard Worker /// \param b
1691*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1692*344a7f5eSAndroid Build Coastguard Worker /// \param c
1693*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1694*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1695*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1696*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1697*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1698*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1699*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1700*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1701*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1702*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1703*344a7f5eSAndroid Build Coastguard Worker ///                       (swapped operands)
1704*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1705*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1706*344a7f5eSAndroid Build Coastguard Worker #define _mm_cmp_ps(a, b, c) __extension__ ({ \
1707*344a7f5eSAndroid Build Coastguard Worker   (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
1708*344a7f5eSAndroid Build Coastguard Worker                                (__v4sf)(__m128)(b), (c)); })
1709*344a7f5eSAndroid Build Coastguard Worker 
1710*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding double-precision values of two
1711*344a7f5eSAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double], using the operation specified by the
1712*344a7f5eSAndroid Build Coastguard Worker ///    immediate integer operand. Returns a [4 x double] vector consisting of
1713*344a7f5eSAndroid Build Coastguard Worker ///    four doubles corresponding to the four comparison results: zero if the
1714*344a7f5eSAndroid Build Coastguard Worker ///    comparison is false, and all 1's if the comparison is true.
1715*344a7f5eSAndroid Build Coastguard Worker ///
1716*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1717*344a7f5eSAndroid Build Coastguard Worker ///
1718*344a7f5eSAndroid Build Coastguard Worker /// \code
1719*344a7f5eSAndroid Build Coastguard Worker /// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);
1720*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1721*344a7f5eSAndroid Build Coastguard Worker ///
1722*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPPD / CMPPD instruction.
1723*344a7f5eSAndroid Build Coastguard Worker ///
1724*344a7f5eSAndroid Build Coastguard Worker /// \param a
1725*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1726*344a7f5eSAndroid Build Coastguard Worker /// \param b
1727*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1728*344a7f5eSAndroid Build Coastguard Worker /// \param c
1729*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1730*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1731*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1732*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1733*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1734*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1735*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1736*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1737*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1738*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1739*344a7f5eSAndroid Build Coastguard Worker ///                        (swapped operands)
1740*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1741*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the comparison results.
1742*344a7f5eSAndroid Build Coastguard Worker #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
1743*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
1744*344a7f5eSAndroid Build Coastguard Worker                                    (__v4df)(__m256d)(b), (c)); })
1745*344a7f5eSAndroid Build Coastguard Worker 
1746*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding values of two 256-bit vectors of
1747*344a7f5eSAndroid Build Coastguard Worker ///    [8 x float], using the operation specified by the immediate integer
1748*344a7f5eSAndroid Build Coastguard Worker ///    operand. Returns a [8 x float] vector consisting of eight floats
1749*344a7f5eSAndroid Build Coastguard Worker ///    corresponding to the eight comparison results: zero if the comparison is
1750*344a7f5eSAndroid Build Coastguard Worker ///    false, and all 1's if the comparison is true.
1751*344a7f5eSAndroid Build Coastguard Worker ///
1752*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1753*344a7f5eSAndroid Build Coastguard Worker ///
1754*344a7f5eSAndroid Build Coastguard Worker /// \code
1755*344a7f5eSAndroid Build Coastguard Worker /// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);
1756*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1757*344a7f5eSAndroid Build Coastguard Worker ///
1758*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPPS / CMPPS instruction.
1759*344a7f5eSAndroid Build Coastguard Worker ///
1760*344a7f5eSAndroid Build Coastguard Worker /// \param a
1761*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1762*344a7f5eSAndroid Build Coastguard Worker /// \param b
1763*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1764*344a7f5eSAndroid Build Coastguard Worker /// \param c
1765*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1766*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1767*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1768*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1769*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1770*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1771*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1772*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1773*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1774*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1775*344a7f5eSAndroid Build Coastguard Worker ///                       (swapped operands)
1776*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1777*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the comparison results.
1778*344a7f5eSAndroid Build Coastguard Worker #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
1779*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
1780*344a7f5eSAndroid Build Coastguard Worker                                   (__v8sf)(__m256)(b), (c)); })
1781*344a7f5eSAndroid Build Coastguard Worker 
1782*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding scalar double-precision values of
1783*344a7f5eSAndroid Build Coastguard Worker ///    two 128-bit vectors of [2 x double], using the operation specified by the
1784*344a7f5eSAndroid Build Coastguard Worker ///    immediate integer operand. If the result is true, all 64 bits of the
1785*344a7f5eSAndroid Build Coastguard Worker ///    destination vector are set; otherwise they are cleared.
1786*344a7f5eSAndroid Build Coastguard Worker ///
1787*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1788*344a7f5eSAndroid Build Coastguard Worker ///
1789*344a7f5eSAndroid Build Coastguard Worker /// \code
1790*344a7f5eSAndroid Build Coastguard Worker /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
1791*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1792*344a7f5eSAndroid Build Coastguard Worker ///
1793*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPSD / CMPSD instruction.
1794*344a7f5eSAndroid Build Coastguard Worker ///
1795*344a7f5eSAndroid Build Coastguard Worker /// \param a
1796*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1797*344a7f5eSAndroid Build Coastguard Worker /// \param b
1798*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1799*344a7f5eSAndroid Build Coastguard Worker /// \param c
1800*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1801*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1802*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1803*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1804*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1805*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1806*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1807*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1808*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1809*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1810*344a7f5eSAndroid Build Coastguard Worker ///                       (swapped operands)
1811*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1812*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1813*344a7f5eSAndroid Build Coastguard Worker #define _mm_cmp_sd(a, b, c) __extension__ ({ \
1814*344a7f5eSAndroid Build Coastguard Worker   (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
1815*344a7f5eSAndroid Build Coastguard Worker                                 (__v2df)(__m128d)(b), (c)); })
1816*344a7f5eSAndroid Build Coastguard Worker 
1817*344a7f5eSAndroid Build Coastguard Worker /// \brief Compares each of the corresponding scalar values of two 128-bit
1818*344a7f5eSAndroid Build Coastguard Worker ///    vectors of [4 x float], using the operation specified by the immediate
1819*344a7f5eSAndroid Build Coastguard Worker ///    integer operand. If the result is true, all 32 bits of the destination
1820*344a7f5eSAndroid Build Coastguard Worker ///    vector are set; otherwise they are cleared.
1821*344a7f5eSAndroid Build Coastguard Worker ///
1822*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1823*344a7f5eSAndroid Build Coastguard Worker ///
1824*344a7f5eSAndroid Build Coastguard Worker /// \code
1825*344a7f5eSAndroid Build Coastguard Worker /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
1826*344a7f5eSAndroid Build Coastguard Worker /// \endcode
1827*344a7f5eSAndroid Build Coastguard Worker ///
1828*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCMPSS / CMPSS instruction.
1829*344a7f5eSAndroid Build Coastguard Worker ///
1830*344a7f5eSAndroid Build Coastguard Worker /// \param a
1831*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1832*344a7f5eSAndroid Build Coastguard Worker /// \param b
1833*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1834*344a7f5eSAndroid Build Coastguard Worker /// \param c
1835*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1836*344a7f5eSAndroid Build Coastguard Worker ///    operation to use:
1837*344a7f5eSAndroid Build Coastguard Worker ///    00h, 08h, 10h, 18h: Equal
1838*344a7f5eSAndroid Build Coastguard Worker ///    01h, 09h, 11h, 19h: Less than
1839*344a7f5eSAndroid Build Coastguard Worker ///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
1840*344a7f5eSAndroid Build Coastguard Worker ///                        operands)
1841*344a7f5eSAndroid Build Coastguard Worker ///    03h, 0Bh, 13h, 1Bh: Unordered
1842*344a7f5eSAndroid Build Coastguard Worker ///    04h, 0Ch, 14h, 1Ch: Not equal
1843*344a7f5eSAndroid Build Coastguard Worker ///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
1844*344a7f5eSAndroid Build Coastguard Worker ///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
1845*344a7f5eSAndroid Build Coastguard Worker ///                       (swapped operands)
1846*344a7f5eSAndroid Build Coastguard Worker ///    07h, 0Fh, 17h, 1Fh: Ordered
1847*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1848*344a7f5eSAndroid Build Coastguard Worker #define _mm_cmp_ss(a, b, c) __extension__ ({ \
1849*344a7f5eSAndroid Build Coastguard Worker   (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
1850*344a7f5eSAndroid Build Coastguard Worker                                (__v4sf)(__m128)(b), (c)); })
1851*344a7f5eSAndroid Build Coastguard Worker 
1852*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [8 x i32] vector and returns the vector element value
1853*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1854*344a7f5eSAndroid Build Coastguard Worker ///
1855*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1856*344a7f5eSAndroid Build Coastguard Worker ///
1857*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
1858*344a7f5eSAndroid Build Coastguard Worker ///   EXTRACTF128+COMPOSITE instruction.
1859*344a7f5eSAndroid Build Coastguard Worker ///
1860*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1861*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x i32].
1862*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1863*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand with bits [2:0] determining which vector
1864*344a7f5eSAndroid Build Coastguard Worker ///    element is extracted and returned.
1865*344a7f5eSAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 32 bits of extended
1866*344a7f5eSAndroid Build Coastguard Worker ///    packed data.
1867*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi32(__m256i __a,const int __imm)1868*344a7f5eSAndroid Build Coastguard Worker _mm256_extract_epi32(__m256i __a, const int __imm)
1869*344a7f5eSAndroid Build Coastguard Worker {
1870*344a7f5eSAndroid Build Coastguard Worker   __v8si __b = (__v8si)__a;
1871*344a7f5eSAndroid Build Coastguard Worker   return __b[__imm & 7];
1872*344a7f5eSAndroid Build Coastguard Worker }
1873*344a7f5eSAndroid Build Coastguard Worker 
1874*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [16 x i16] vector and returns the vector element value
1875*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1876*344a7f5eSAndroid Build Coastguard Worker ///
1877*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1878*344a7f5eSAndroid Build Coastguard Worker ///
1879*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
1880*344a7f5eSAndroid Build Coastguard Worker ///    EXTRACTF128+COMPOSITE instruction.
1881*344a7f5eSAndroid Build Coastguard Worker ///
1882*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1883*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector of [16 x i16].
1884*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1885*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand with bits [3:0] determining which vector
1886*344a7f5eSAndroid Build Coastguard Worker ///    element is extracted and returned.
1887*344a7f5eSAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 16 bits of zero extended
1888*344a7f5eSAndroid Build Coastguard Worker ///    packed data.
1889*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi16(__m256i __a,const int __imm)1890*344a7f5eSAndroid Build Coastguard Worker _mm256_extract_epi16(__m256i __a, const int __imm)
1891*344a7f5eSAndroid Build Coastguard Worker {
1892*344a7f5eSAndroid Build Coastguard Worker   __v16hi __b = (__v16hi)__a;
1893*344a7f5eSAndroid Build Coastguard Worker   return (unsigned short)__b[__imm & 15];
1894*344a7f5eSAndroid Build Coastguard Worker }
1895*344a7f5eSAndroid Build Coastguard Worker 
1896*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [32 x i8] vector and returns the vector element value
1897*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1898*344a7f5eSAndroid Build Coastguard Worker ///
1899*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1900*344a7f5eSAndroid Build Coastguard Worker ///
1901*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
1902*344a7f5eSAndroid Build Coastguard Worker ///    EXTRACTF128+COMPOSITE instruction.
1903*344a7f5eSAndroid Build Coastguard Worker ///
1904*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1905*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector of [32 x i8].
1906*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1907*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand with bits [4:0] determining which vector
1908*344a7f5eSAndroid Build Coastguard Worker ///    element is extracted and returned.
1909*344a7f5eSAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 8 bits of zero extended
1910*344a7f5eSAndroid Build Coastguard Worker ///    packed data.
1911*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_extract_epi8(__m256i __a,const int __imm)1912*344a7f5eSAndroid Build Coastguard Worker _mm256_extract_epi8(__m256i __a, const int __imm)
1913*344a7f5eSAndroid Build Coastguard Worker {
1914*344a7f5eSAndroid Build Coastguard Worker   __v32qi __b = (__v32qi)__a;
1915*344a7f5eSAndroid Build Coastguard Worker   return (unsigned char)__b[__imm & 31];
1916*344a7f5eSAndroid Build Coastguard Worker }
1917*344a7f5eSAndroid Build Coastguard Worker 
1918*344a7f5eSAndroid Build Coastguard Worker #ifdef __x86_64__
1919*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [4 x i64] vector and returns the vector element value
1920*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1921*344a7f5eSAndroid Build Coastguard Worker ///
1922*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1923*344a7f5eSAndroid Build Coastguard Worker ///
1924*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
1925*344a7f5eSAndroid Build Coastguard Worker ///    EXTRACTF128+COMPOSITE instruction.
1926*344a7f5eSAndroid Build Coastguard Worker ///
1927*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1928*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector of [4 x i64].
1929*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1930*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer operand with bits [1:0] determining which vector
1931*344a7f5eSAndroid Build Coastguard Worker ///    element is extracted and returned.
1932*344a7f5eSAndroid Build Coastguard Worker /// \returns A 64-bit integer containing the extracted 64 bits of extended
1933*344a7f5eSAndroid Build Coastguard Worker ///    packed data.
1934*344a7f5eSAndroid Build Coastguard Worker static __inline long long  __DEFAULT_FN_ATTRS
_mm256_extract_epi64(__m256i __a,const int __imm)1935*344a7f5eSAndroid Build Coastguard Worker _mm256_extract_epi64(__m256i __a, const int __imm)
1936*344a7f5eSAndroid Build Coastguard Worker {
1937*344a7f5eSAndroid Build Coastguard Worker   __v4di __b = (__v4di)__a;
1938*344a7f5eSAndroid Build Coastguard Worker   return __b[__imm & 3];
1939*344a7f5eSAndroid Build Coastguard Worker }
1940*344a7f5eSAndroid Build Coastguard Worker #endif
1941*344a7f5eSAndroid Build Coastguard Worker 
1942*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [8 x i32] vector and replaces the vector element value
1943*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand by a new value. Returns the
1944*344a7f5eSAndroid Build Coastguard Worker ///    modified vector.
1945*344a7f5eSAndroid Build Coastguard Worker ///
1946*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1947*344a7f5eSAndroid Build Coastguard Worker ///
1948*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
1949*344a7f5eSAndroid Build Coastguard Worker ///    INSERTF128+COMPOSITE instruction.
1950*344a7f5eSAndroid Build Coastguard Worker ///
1951*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1952*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [8 x i32] to be used by the insert operation.
1953*344a7f5eSAndroid Build Coastguard Worker /// \param __b
1954*344a7f5eSAndroid Build Coastguard Worker ///    An integer value. The replacement value for the insert operation.
1955*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1956*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
1957*344a7f5eSAndroid Build Coastguard Worker ///    replaced.
1958*344a7f5eSAndroid Build Coastguard Worker /// \returns A copy of vector __a, after replacing its element indexed by __imm
1959*344a7f5eSAndroid Build Coastguard Worker ///     with __b.
1960*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_insert_epi32(__m256i __a,int __b,int const __imm)1961*344a7f5eSAndroid Build Coastguard Worker _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
1962*344a7f5eSAndroid Build Coastguard Worker {
1963*344a7f5eSAndroid Build Coastguard Worker   __v8si __c = (__v8si)__a;
1964*344a7f5eSAndroid Build Coastguard Worker   __c[__imm & 7] = __b;
1965*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__c;
1966*344a7f5eSAndroid Build Coastguard Worker }
1967*344a7f5eSAndroid Build Coastguard Worker 
1968*344a7f5eSAndroid Build Coastguard Worker 
1969*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [16 x i16] vector and replaces the vector element value
1970*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
1971*344a7f5eSAndroid Build Coastguard Worker ///    modified vector.
1972*344a7f5eSAndroid Build Coastguard Worker ///
1973*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1974*344a7f5eSAndroid Build Coastguard Worker ///
1975*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
1976*344a7f5eSAndroid Build Coastguard Worker ///    INSERTF128+COMPOSITE instruction.
1977*344a7f5eSAndroid Build Coastguard Worker ///
1978*344a7f5eSAndroid Build Coastguard Worker /// \param __a
1979*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [16 x i16] to be used by the insert operation.
1980*344a7f5eSAndroid Build Coastguard Worker /// \param __b
1981*344a7f5eSAndroid Build Coastguard Worker ///    An i16 integer value. The replacement value for the insert operation.
1982*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
1983*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
1984*344a7f5eSAndroid Build Coastguard Worker ///    replaced.
1985*344a7f5eSAndroid Build Coastguard Worker /// \returns A copy of vector __a, after replacing its element indexed by __imm
1986*344a7f5eSAndroid Build Coastguard Worker ///     with __b.
1987*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_insert_epi16(__m256i __a,int __b,int const __imm)1988*344a7f5eSAndroid Build Coastguard Worker _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
1989*344a7f5eSAndroid Build Coastguard Worker {
1990*344a7f5eSAndroid Build Coastguard Worker   __v16hi __c = (__v16hi)__a;
1991*344a7f5eSAndroid Build Coastguard Worker   __c[__imm & 15] = __b;
1992*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__c;
1993*344a7f5eSAndroid Build Coastguard Worker }
1994*344a7f5eSAndroid Build Coastguard Worker 
1995*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [32 x i8] vector and replaces the vector element value
1996*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
1997*344a7f5eSAndroid Build Coastguard Worker ///    modified vector.
1998*344a7f5eSAndroid Build Coastguard Worker ///
1999*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2000*344a7f5eSAndroid Build Coastguard Worker ///
2001*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
2002*344a7f5eSAndroid Build Coastguard Worker ///    INSERTF128+COMPOSITE instruction.
2003*344a7f5eSAndroid Build Coastguard Worker ///
2004*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2005*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [32 x i8] to be used by the insert operation.
2006*344a7f5eSAndroid Build Coastguard Worker /// \param __b
2007*344a7f5eSAndroid Build Coastguard Worker ///    An i8 integer value. The replacement value for the insert operation.
2008*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
2009*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2010*344a7f5eSAndroid Build Coastguard Worker ///    replaced.
2011*344a7f5eSAndroid Build Coastguard Worker /// \returns A copy of vector __a, after replacing its element indexed by __imm
2012*344a7f5eSAndroid Build Coastguard Worker ///    with __b.
2013*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_insert_epi8(__m256i __a,int __b,int const __imm)2014*344a7f5eSAndroid Build Coastguard Worker _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
2015*344a7f5eSAndroid Build Coastguard Worker {
2016*344a7f5eSAndroid Build Coastguard Worker   __v32qi __c = (__v32qi)__a;
2017*344a7f5eSAndroid Build Coastguard Worker   __c[__imm & 31] = __b;
2018*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__c;
2019*344a7f5eSAndroid Build Coastguard Worker }
2020*344a7f5eSAndroid Build Coastguard Worker 
2021*344a7f5eSAndroid Build Coastguard Worker #ifdef __x86_64__
2022*344a7f5eSAndroid Build Coastguard Worker /// \brief Takes a [4 x i64] vector and replaces the vector element value
2023*344a7f5eSAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
2024*344a7f5eSAndroid Build Coastguard Worker ///    modified vector.
2025*344a7f5eSAndroid Build Coastguard Worker ///
2026*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2027*344a7f5eSAndroid Build Coastguard Worker ///
2028*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
2029*344a7f5eSAndroid Build Coastguard Worker ///    INSERTF128+COMPOSITE instruction.
2030*344a7f5eSAndroid Build Coastguard Worker ///
2031*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2032*344a7f5eSAndroid Build Coastguard Worker ///    A vector of [4 x i64] to be used by the insert operation.
2033*344a7f5eSAndroid Build Coastguard Worker /// \param __b
2034*344a7f5eSAndroid Build Coastguard Worker ///    A 64-bit integer value. The replacement value for the insert operation.
2035*344a7f5eSAndroid Build Coastguard Worker /// \param __imm
2036*344a7f5eSAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2037*344a7f5eSAndroid Build Coastguard Worker ///    replaced.
2038*344a7f5eSAndroid Build Coastguard Worker /// \returns A copy of vector __a, after replacing its element indexed by __imm
2039*344a7f5eSAndroid Build Coastguard Worker ///     with __b.
2040*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_insert_epi64(__m256i __a,long long __b,int const __imm)2041*344a7f5eSAndroid Build Coastguard Worker _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
2042*344a7f5eSAndroid Build Coastguard Worker {
2043*344a7f5eSAndroid Build Coastguard Worker   __v4di __c = (__v4di)__a;
2044*344a7f5eSAndroid Build Coastguard Worker   __c[__imm & 3] = __b;
2045*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__c;
2046*344a7f5eSAndroid Build Coastguard Worker }
2047*344a7f5eSAndroid Build Coastguard Worker #endif
2048*344a7f5eSAndroid Build Coastguard Worker 
2049*344a7f5eSAndroid Build Coastguard Worker /* Conversion */
2050*344a7f5eSAndroid Build Coastguard Worker /// \brief Converts a vector of [4 x i32] into a vector of [4 x double].
2051*344a7f5eSAndroid Build Coastguard Worker ///
2052*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2053*344a7f5eSAndroid Build Coastguard Worker ///
2054*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCVTDQ2PD / CVTDQ2PD instruction.
2055*344a7f5eSAndroid Build Coastguard Worker ///
2056*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2057*344a7f5eSAndroid Build Coastguard Worker ///    A 128-bit integer vector of [4 x i32].
2058*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the converted values.
2059*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtepi32_pd(__m128i __a)2060*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtepi32_pd(__m128i __a)
2061*344a7f5eSAndroid Build Coastguard Worker {
2062*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
2063*344a7f5eSAndroid Build Coastguard Worker }
2064*344a7f5eSAndroid Build Coastguard Worker 
2065*344a7f5eSAndroid Build Coastguard Worker /// \brief Converts a vector of [8 x i32] into a vector of [8 x float].
2066*344a7f5eSAndroid Build Coastguard Worker ///
2067*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2068*344a7f5eSAndroid Build Coastguard Worker ///
2069*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCVTDQ2PS / CVTDQ2PS instruction.
2070*344a7f5eSAndroid Build Coastguard Worker ///
2071*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2072*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2073*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the converted values.
2074*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_cvtepi32_ps(__m256i __a)2075*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtepi32_ps(__m256i __a)
2076*344a7f5eSAndroid Build Coastguard Worker {
2077*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
2078*344a7f5eSAndroid Build Coastguard Worker }
2079*344a7f5eSAndroid Build Coastguard Worker 
2080*344a7f5eSAndroid Build Coastguard Worker /// \brief Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2081*344a7f5eSAndroid Build Coastguard Worker ///    [4 x float].
2082*344a7f5eSAndroid Build Coastguard Worker ///
2083*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2084*344a7f5eSAndroid Build Coastguard Worker ///
2085*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCVTPD2PS / CVTPD2PS instruction.
2086*344a7f5eSAndroid Build Coastguard Worker ///
2087*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2088*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2089*344a7f5eSAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the converted values.
2090*344a7f5eSAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm256_cvtpd_ps(__m256d __a)2091*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtpd_ps(__m256d __a)
2092*344a7f5eSAndroid Build Coastguard Worker {
2093*344a7f5eSAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
2094*344a7f5eSAndroid Build Coastguard Worker }
2095*344a7f5eSAndroid Build Coastguard Worker 
2096*344a7f5eSAndroid Build Coastguard Worker /// \brief Converts a vector of [8 x float] into a vector of [8 x i32].
2097*344a7f5eSAndroid Build Coastguard Worker ///
2098*344a7f5eSAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2099*344a7f5eSAndroid Build Coastguard Worker ///
2100*344a7f5eSAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VCVTPS2DQ / CVTPS2DQ instruction.
2101*344a7f5eSAndroid Build Coastguard Worker ///
2102*344a7f5eSAndroid Build Coastguard Worker /// \param __a
2103*344a7f5eSAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2104*344a7f5eSAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the converted values.
2105*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_cvtps_epi32(__m256 __a)2106*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtps_epi32(__m256 __a)
2107*344a7f5eSAndroid Build Coastguard Worker {
2108*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
2109*344a7f5eSAndroid Build Coastguard Worker }
2110*344a7f5eSAndroid Build Coastguard Worker 
2111*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtps_pd(__m128 __a)2112*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtps_pd(__m128 __a)
2113*344a7f5eSAndroid Build Coastguard Worker {
2114*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
2115*344a7f5eSAndroid Build Coastguard Worker }
2116*344a7f5eSAndroid Build Coastguard Worker 
2117*344a7f5eSAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_cvttpd_epi32(__m256d __a)2118*344a7f5eSAndroid Build Coastguard Worker _mm256_cvttpd_epi32(__m256d __a)
2119*344a7f5eSAndroid Build Coastguard Worker {
2120*344a7f5eSAndroid Build Coastguard Worker   return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
2121*344a7f5eSAndroid Build Coastguard Worker }
2122*344a7f5eSAndroid Build Coastguard Worker 
2123*344a7f5eSAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_cvtpd_epi32(__m256d __a)2124*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtpd_epi32(__m256d __a)
2125*344a7f5eSAndroid Build Coastguard Worker {
2126*344a7f5eSAndroid Build Coastguard Worker   return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
2127*344a7f5eSAndroid Build Coastguard Worker }
2128*344a7f5eSAndroid Build Coastguard Worker 
2129*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_cvttps_epi32(__m256 __a)2130*344a7f5eSAndroid Build Coastguard Worker _mm256_cvttps_epi32(__m256 __a)
2131*344a7f5eSAndroid Build Coastguard Worker {
2132*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
2133*344a7f5eSAndroid Build Coastguard Worker }
2134*344a7f5eSAndroid Build Coastguard Worker 
2135*344a7f5eSAndroid Build Coastguard Worker static __inline double __DEFAULT_FN_ATTRS
_mm256_cvtsd_f64(__m256d __a)2136*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtsd_f64(__m256d __a)
2137*344a7f5eSAndroid Build Coastguard Worker {
2138*344a7f5eSAndroid Build Coastguard Worker  return __a[0];
2139*344a7f5eSAndroid Build Coastguard Worker }
2140*344a7f5eSAndroid Build Coastguard Worker 
2141*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_cvtsi256_si32(__m256i __a)2142*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtsi256_si32(__m256i __a)
2143*344a7f5eSAndroid Build Coastguard Worker {
2144*344a7f5eSAndroid Build Coastguard Worker  __v8si __b = (__v8si)__a;
2145*344a7f5eSAndroid Build Coastguard Worker  return __b[0];
2146*344a7f5eSAndroid Build Coastguard Worker }
2147*344a7f5eSAndroid Build Coastguard Worker 
2148*344a7f5eSAndroid Build Coastguard Worker static __inline float __DEFAULT_FN_ATTRS
_mm256_cvtss_f32(__m256 __a)2149*344a7f5eSAndroid Build Coastguard Worker _mm256_cvtss_f32(__m256 __a)
2150*344a7f5eSAndroid Build Coastguard Worker {
2151*344a7f5eSAndroid Build Coastguard Worker  return __a[0];
2152*344a7f5eSAndroid Build Coastguard Worker }
2153*344a7f5eSAndroid Build Coastguard Worker 
2154*344a7f5eSAndroid Build Coastguard Worker /* Vector replicate */
2155*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_movehdup_ps(__m256 __a)2156*344a7f5eSAndroid Build Coastguard Worker _mm256_movehdup_ps(__m256 __a)
2157*344a7f5eSAndroid Build Coastguard Worker {
2158*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
2159*344a7f5eSAndroid Build Coastguard Worker }
2160*344a7f5eSAndroid Build Coastguard Worker 
2161*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_moveldup_ps(__m256 __a)2162*344a7f5eSAndroid Build Coastguard Worker _mm256_moveldup_ps(__m256 __a)
2163*344a7f5eSAndroid Build Coastguard Worker {
2164*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
2165*344a7f5eSAndroid Build Coastguard Worker }
2166*344a7f5eSAndroid Build Coastguard Worker 
2167*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_movedup_pd(__m256d __a)2168*344a7f5eSAndroid Build Coastguard Worker _mm256_movedup_pd(__m256d __a)
2169*344a7f5eSAndroid Build Coastguard Worker {
2170*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
2171*344a7f5eSAndroid Build Coastguard Worker }
2172*344a7f5eSAndroid Build Coastguard Worker 
2173*344a7f5eSAndroid Build Coastguard Worker /* Unpack and Interleave */
2174*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_unpackhi_pd(__m256d __a,__m256d __b)2175*344a7f5eSAndroid Build Coastguard Worker _mm256_unpackhi_pd(__m256d __a, __m256d __b)
2176*344a7f5eSAndroid Build Coastguard Worker {
2177*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
2178*344a7f5eSAndroid Build Coastguard Worker }
2179*344a7f5eSAndroid Build Coastguard Worker 
2180*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_unpacklo_pd(__m256d __a,__m256d __b)2181*344a7f5eSAndroid Build Coastguard Worker _mm256_unpacklo_pd(__m256d __a, __m256d __b)
2182*344a7f5eSAndroid Build Coastguard Worker {
2183*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
2184*344a7f5eSAndroid Build Coastguard Worker }
2185*344a7f5eSAndroid Build Coastguard Worker 
2186*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_unpackhi_ps(__m256 __a,__m256 __b)2187*344a7f5eSAndroid Build Coastguard Worker _mm256_unpackhi_ps(__m256 __a, __m256 __b)
2188*344a7f5eSAndroid Build Coastguard Worker {
2189*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
2190*344a7f5eSAndroid Build Coastguard Worker }
2191*344a7f5eSAndroid Build Coastguard Worker 
2192*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_unpacklo_ps(__m256 __a,__m256 __b)2193*344a7f5eSAndroid Build Coastguard Worker _mm256_unpacklo_ps(__m256 __a, __m256 __b)
2194*344a7f5eSAndroid Build Coastguard Worker {
2195*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
2196*344a7f5eSAndroid Build Coastguard Worker }
2197*344a7f5eSAndroid Build Coastguard Worker 
2198*344a7f5eSAndroid Build Coastguard Worker /* Bit Test */
2199*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testz_pd(__m128d __a,__m128d __b)2200*344a7f5eSAndroid Build Coastguard Worker _mm_testz_pd(__m128d __a, __m128d __b)
2201*344a7f5eSAndroid Build Coastguard Worker {
2202*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
2203*344a7f5eSAndroid Build Coastguard Worker }
2204*344a7f5eSAndroid Build Coastguard Worker 
2205*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testc_pd(__m128d __a,__m128d __b)2206*344a7f5eSAndroid Build Coastguard Worker _mm_testc_pd(__m128d __a, __m128d __b)
2207*344a7f5eSAndroid Build Coastguard Worker {
2208*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
2209*344a7f5eSAndroid Build Coastguard Worker }
2210*344a7f5eSAndroid Build Coastguard Worker 
2211*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testnzc_pd(__m128d __a,__m128d __b)2212*344a7f5eSAndroid Build Coastguard Worker _mm_testnzc_pd(__m128d __a, __m128d __b)
2213*344a7f5eSAndroid Build Coastguard Worker {
2214*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
2215*344a7f5eSAndroid Build Coastguard Worker }
2216*344a7f5eSAndroid Build Coastguard Worker 
2217*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testz_ps(__m128 __a,__m128 __b)2218*344a7f5eSAndroid Build Coastguard Worker _mm_testz_ps(__m128 __a, __m128 __b)
2219*344a7f5eSAndroid Build Coastguard Worker {
2220*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
2221*344a7f5eSAndroid Build Coastguard Worker }
2222*344a7f5eSAndroid Build Coastguard Worker 
2223*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testc_ps(__m128 __a,__m128 __b)2224*344a7f5eSAndroid Build Coastguard Worker _mm_testc_ps(__m128 __a, __m128 __b)
2225*344a7f5eSAndroid Build Coastguard Worker {
2226*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
2227*344a7f5eSAndroid Build Coastguard Worker }
2228*344a7f5eSAndroid Build Coastguard Worker 
2229*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm_testnzc_ps(__m128 __a,__m128 __b)2230*344a7f5eSAndroid Build Coastguard Worker _mm_testnzc_ps(__m128 __a, __m128 __b)
2231*344a7f5eSAndroid Build Coastguard Worker {
2232*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
2233*344a7f5eSAndroid Build Coastguard Worker }
2234*344a7f5eSAndroid Build Coastguard Worker 
2235*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_pd(__m256d __a,__m256d __b)2236*344a7f5eSAndroid Build Coastguard Worker _mm256_testz_pd(__m256d __a, __m256d __b)
2237*344a7f5eSAndroid Build Coastguard Worker {
2238*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
2239*344a7f5eSAndroid Build Coastguard Worker }
2240*344a7f5eSAndroid Build Coastguard Worker 
2241*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_pd(__m256d __a,__m256d __b)2242*344a7f5eSAndroid Build Coastguard Worker _mm256_testc_pd(__m256d __a, __m256d __b)
2243*344a7f5eSAndroid Build Coastguard Worker {
2244*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
2245*344a7f5eSAndroid Build Coastguard Worker }
2246*344a7f5eSAndroid Build Coastguard Worker 
2247*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_pd(__m256d __a,__m256d __b)2248*344a7f5eSAndroid Build Coastguard Worker _mm256_testnzc_pd(__m256d __a, __m256d __b)
2249*344a7f5eSAndroid Build Coastguard Worker {
2250*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
2251*344a7f5eSAndroid Build Coastguard Worker }
2252*344a7f5eSAndroid Build Coastguard Worker 
2253*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_ps(__m256 __a,__m256 __b)2254*344a7f5eSAndroid Build Coastguard Worker _mm256_testz_ps(__m256 __a, __m256 __b)
2255*344a7f5eSAndroid Build Coastguard Worker {
2256*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
2257*344a7f5eSAndroid Build Coastguard Worker }
2258*344a7f5eSAndroid Build Coastguard Worker 
2259*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_ps(__m256 __a,__m256 __b)2260*344a7f5eSAndroid Build Coastguard Worker _mm256_testc_ps(__m256 __a, __m256 __b)
2261*344a7f5eSAndroid Build Coastguard Worker {
2262*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
2263*344a7f5eSAndroid Build Coastguard Worker }
2264*344a7f5eSAndroid Build Coastguard Worker 
2265*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_ps(__m256 __a,__m256 __b)2266*344a7f5eSAndroid Build Coastguard Worker _mm256_testnzc_ps(__m256 __a, __m256 __b)
2267*344a7f5eSAndroid Build Coastguard Worker {
2268*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
2269*344a7f5eSAndroid Build Coastguard Worker }
2270*344a7f5eSAndroid Build Coastguard Worker 
2271*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_si256(__m256i __a,__m256i __b)2272*344a7f5eSAndroid Build Coastguard Worker _mm256_testz_si256(__m256i __a, __m256i __b)
2273*344a7f5eSAndroid Build Coastguard Worker {
2274*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
2275*344a7f5eSAndroid Build Coastguard Worker }
2276*344a7f5eSAndroid Build Coastguard Worker 
2277*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_si256(__m256i __a,__m256i __b)2278*344a7f5eSAndroid Build Coastguard Worker _mm256_testc_si256(__m256i __a, __m256i __b)
2279*344a7f5eSAndroid Build Coastguard Worker {
2280*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
2281*344a7f5eSAndroid Build Coastguard Worker }
2282*344a7f5eSAndroid Build Coastguard Worker 
2283*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_si256(__m256i __a,__m256i __b)2284*344a7f5eSAndroid Build Coastguard Worker _mm256_testnzc_si256(__m256i __a, __m256i __b)
2285*344a7f5eSAndroid Build Coastguard Worker {
2286*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
2287*344a7f5eSAndroid Build Coastguard Worker }
2288*344a7f5eSAndroid Build Coastguard Worker 
2289*344a7f5eSAndroid Build Coastguard Worker /* Vector extract sign mask */
2290*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_pd(__m256d __a)2291*344a7f5eSAndroid Build Coastguard Worker _mm256_movemask_pd(__m256d __a)
2292*344a7f5eSAndroid Build Coastguard Worker {
2293*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_movmskpd256((__v4df)__a);
2294*344a7f5eSAndroid Build Coastguard Worker }
2295*344a7f5eSAndroid Build Coastguard Worker 
2296*344a7f5eSAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_ps(__m256 __a)2297*344a7f5eSAndroid Build Coastguard Worker _mm256_movemask_ps(__m256 __a)
2298*344a7f5eSAndroid Build Coastguard Worker {
2299*344a7f5eSAndroid Build Coastguard Worker   return __builtin_ia32_movmskps256((__v8sf)__a);
2300*344a7f5eSAndroid Build Coastguard Worker }
2301*344a7f5eSAndroid Build Coastguard Worker 
2302*344a7f5eSAndroid Build Coastguard Worker /* Vector __zero */
2303*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_zeroall(void)2304*344a7f5eSAndroid Build Coastguard Worker _mm256_zeroall(void)
2305*344a7f5eSAndroid Build Coastguard Worker {
2306*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_vzeroall();
2307*344a7f5eSAndroid Build Coastguard Worker }
2308*344a7f5eSAndroid Build Coastguard Worker 
2309*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_zeroupper(void)2310*344a7f5eSAndroid Build Coastguard Worker _mm256_zeroupper(void)
2311*344a7f5eSAndroid Build Coastguard Worker {
2312*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_vzeroupper();
2313*344a7f5eSAndroid Build Coastguard Worker }
2314*344a7f5eSAndroid Build Coastguard Worker 
2315*344a7f5eSAndroid Build Coastguard Worker /* Vector load with broadcast */
2316*344a7f5eSAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm_broadcast_ss(float const * __a)2317*344a7f5eSAndroid Build Coastguard Worker _mm_broadcast_ss(float const *__a)
2318*344a7f5eSAndroid Build Coastguard Worker {
2319*344a7f5eSAndroid Build Coastguard Worker   float __f = *__a;
2320*344a7f5eSAndroid Build Coastguard Worker   return (__m128)(__v4sf){ __f, __f, __f, __f };
2321*344a7f5eSAndroid Build Coastguard Worker }
2322*344a7f5eSAndroid Build Coastguard Worker 
2323*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_broadcast_sd(double const * __a)2324*344a7f5eSAndroid Build Coastguard Worker _mm256_broadcast_sd(double const *__a)
2325*344a7f5eSAndroid Build Coastguard Worker {
2326*344a7f5eSAndroid Build Coastguard Worker   double __d = *__a;
2327*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)(__v4df){ __d, __d, __d, __d };
2328*344a7f5eSAndroid Build Coastguard Worker }
2329*344a7f5eSAndroid Build Coastguard Worker 
2330*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_ss(float const * __a)2331*344a7f5eSAndroid Build Coastguard Worker _mm256_broadcast_ss(float const *__a)
2332*344a7f5eSAndroid Build Coastguard Worker {
2333*344a7f5eSAndroid Build Coastguard Worker   float __f = *__a;
2334*344a7f5eSAndroid Build Coastguard Worker   return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
2335*344a7f5eSAndroid Build Coastguard Worker }
2336*344a7f5eSAndroid Build Coastguard Worker 
2337*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_broadcast_pd(__m128d const * __a)2338*344a7f5eSAndroid Build Coastguard Worker _mm256_broadcast_pd(__m128d const *__a)
2339*344a7f5eSAndroid Build Coastguard Worker {
2340*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a);
2341*344a7f5eSAndroid Build Coastguard Worker }
2342*344a7f5eSAndroid Build Coastguard Worker 
2343*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_ps(__m128 const * __a)2344*344a7f5eSAndroid Build Coastguard Worker _mm256_broadcast_ps(__m128 const *__a)
2345*344a7f5eSAndroid Build Coastguard Worker {
2346*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a);
2347*344a7f5eSAndroid Build Coastguard Worker }
2348*344a7f5eSAndroid Build Coastguard Worker 
2349*344a7f5eSAndroid Build Coastguard Worker /* SIMD load ops */
2350*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_load_pd(double const * __p)2351*344a7f5eSAndroid Build Coastguard Worker _mm256_load_pd(double const *__p)
2352*344a7f5eSAndroid Build Coastguard Worker {
2353*344a7f5eSAndroid Build Coastguard Worker   return *(__m256d *)__p;
2354*344a7f5eSAndroid Build Coastguard Worker }
2355*344a7f5eSAndroid Build Coastguard Worker 
2356*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_load_ps(float const * __p)2357*344a7f5eSAndroid Build Coastguard Worker _mm256_load_ps(float const *__p)
2358*344a7f5eSAndroid Build Coastguard Worker {
2359*344a7f5eSAndroid Build Coastguard Worker   return *(__m256 *)__p;
2360*344a7f5eSAndroid Build Coastguard Worker }
2361*344a7f5eSAndroid Build Coastguard Worker 
2362*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu_pd(double const * __p)2363*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu_pd(double const *__p)
2364*344a7f5eSAndroid Build Coastguard Worker {
2365*344a7f5eSAndroid Build Coastguard Worker   struct __loadu_pd {
2366*344a7f5eSAndroid Build Coastguard Worker     __m256d __v;
2367*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2368*344a7f5eSAndroid Build Coastguard Worker   return ((struct __loadu_pd*)__p)->__v;
2369*344a7f5eSAndroid Build Coastguard Worker }
2370*344a7f5eSAndroid Build Coastguard Worker 
2371*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu_ps(float const * __p)2372*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu_ps(float const *__p)
2373*344a7f5eSAndroid Build Coastguard Worker {
2374*344a7f5eSAndroid Build Coastguard Worker   struct __loadu_ps {
2375*344a7f5eSAndroid Build Coastguard Worker     __m256 __v;
2376*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2377*344a7f5eSAndroid Build Coastguard Worker   return ((struct __loadu_ps*)__p)->__v;
2378*344a7f5eSAndroid Build Coastguard Worker }
2379*344a7f5eSAndroid Build Coastguard Worker 
2380*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_load_si256(__m256i const * __p)2381*344a7f5eSAndroid Build Coastguard Worker _mm256_load_si256(__m256i const *__p)
2382*344a7f5eSAndroid Build Coastguard Worker {
2383*344a7f5eSAndroid Build Coastguard Worker   return *__p;
2384*344a7f5eSAndroid Build Coastguard Worker }
2385*344a7f5eSAndroid Build Coastguard Worker 
2386*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu_si256(__m256i const * __p)2387*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu_si256(__m256i const *__p)
2388*344a7f5eSAndroid Build Coastguard Worker {
2389*344a7f5eSAndroid Build Coastguard Worker   struct __loadu_si256 {
2390*344a7f5eSAndroid Build Coastguard Worker     __m256i __v;
2391*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2392*344a7f5eSAndroid Build Coastguard Worker   return ((struct __loadu_si256*)__p)->__v;
2393*344a7f5eSAndroid Build Coastguard Worker }
2394*344a7f5eSAndroid Build Coastguard Worker 
2395*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_lddqu_si256(__m256i const * __p)2396*344a7f5eSAndroid Build Coastguard Worker _mm256_lddqu_si256(__m256i const *__p)
2397*344a7f5eSAndroid Build Coastguard Worker {
2398*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
2399*344a7f5eSAndroid Build Coastguard Worker }
2400*344a7f5eSAndroid Build Coastguard Worker 
2401*344a7f5eSAndroid Build Coastguard Worker /* SIMD store ops */
2402*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_pd(double * __p,__m256d __a)2403*344a7f5eSAndroid Build Coastguard Worker _mm256_store_pd(double *__p, __m256d __a)
2404*344a7f5eSAndroid Build Coastguard Worker {
2405*344a7f5eSAndroid Build Coastguard Worker   *(__m256d *)__p = __a;
2406*344a7f5eSAndroid Build Coastguard Worker }
2407*344a7f5eSAndroid Build Coastguard Worker 
2408*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_ps(float * __p,__m256 __a)2409*344a7f5eSAndroid Build Coastguard Worker _mm256_store_ps(float *__p, __m256 __a)
2410*344a7f5eSAndroid Build Coastguard Worker {
2411*344a7f5eSAndroid Build Coastguard Worker   *(__m256 *)__p = __a;
2412*344a7f5eSAndroid Build Coastguard Worker }
2413*344a7f5eSAndroid Build Coastguard Worker 
2414*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_pd(double * __p,__m256d __a)2415*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu_pd(double *__p, __m256d __a)
2416*344a7f5eSAndroid Build Coastguard Worker {
2417*344a7f5eSAndroid Build Coastguard Worker   struct __storeu_pd {
2418*344a7f5eSAndroid Build Coastguard Worker     __m256d __v;
2419*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2420*344a7f5eSAndroid Build Coastguard Worker   ((struct __storeu_pd*)__p)->__v = __a;
2421*344a7f5eSAndroid Build Coastguard Worker }
2422*344a7f5eSAndroid Build Coastguard Worker 
2423*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_ps(float * __p,__m256 __a)2424*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu_ps(float *__p, __m256 __a)
2425*344a7f5eSAndroid Build Coastguard Worker {
2426*344a7f5eSAndroid Build Coastguard Worker   struct __storeu_ps {
2427*344a7f5eSAndroid Build Coastguard Worker     __m256 __v;
2428*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2429*344a7f5eSAndroid Build Coastguard Worker   ((struct __storeu_ps*)__p)->__v = __a;
2430*344a7f5eSAndroid Build Coastguard Worker }
2431*344a7f5eSAndroid Build Coastguard Worker 
2432*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_si256(__m256i * __p,__m256i __a)2433*344a7f5eSAndroid Build Coastguard Worker _mm256_store_si256(__m256i *__p, __m256i __a)
2434*344a7f5eSAndroid Build Coastguard Worker {
2435*344a7f5eSAndroid Build Coastguard Worker   *__p = __a;
2436*344a7f5eSAndroid Build Coastguard Worker }
2437*344a7f5eSAndroid Build Coastguard Worker 
2438*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_si256(__m256i * __p,__m256i __a)2439*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu_si256(__m256i *__p, __m256i __a)
2440*344a7f5eSAndroid Build Coastguard Worker {
2441*344a7f5eSAndroid Build Coastguard Worker   struct __storeu_si256 {
2442*344a7f5eSAndroid Build Coastguard Worker     __m256i __v;
2443*344a7f5eSAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
2444*344a7f5eSAndroid Build Coastguard Worker   ((struct __storeu_si256*)__p)->__v = __a;
2445*344a7f5eSAndroid Build Coastguard Worker }
2446*344a7f5eSAndroid Build Coastguard Worker 
2447*344a7f5eSAndroid Build Coastguard Worker /* Conditional load ops */
2448*344a7f5eSAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS
_mm_maskload_pd(double const * __p,__m128i __m)2449*344a7f5eSAndroid Build Coastguard Worker _mm_maskload_pd(double const *__p, __m128i __m)
2450*344a7f5eSAndroid Build Coastguard Worker {
2451*344a7f5eSAndroid Build Coastguard Worker   return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
2452*344a7f5eSAndroid Build Coastguard Worker }
2453*344a7f5eSAndroid Build Coastguard Worker 
2454*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_maskload_pd(double const * __p,__m256i __m)2455*344a7f5eSAndroid Build Coastguard Worker _mm256_maskload_pd(double const *__p, __m256i __m)
2456*344a7f5eSAndroid Build Coastguard Worker {
2457*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
2458*344a7f5eSAndroid Build Coastguard Worker                                                (__v4di)__m);
2459*344a7f5eSAndroid Build Coastguard Worker }
2460*344a7f5eSAndroid Build Coastguard Worker 
2461*344a7f5eSAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm_maskload_ps(float const * __p,__m128i __m)2462*344a7f5eSAndroid Build Coastguard Worker _mm_maskload_ps(float const *__p, __m128i __m)
2463*344a7f5eSAndroid Build Coastguard Worker {
2464*344a7f5eSAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
2465*344a7f5eSAndroid Build Coastguard Worker }
2466*344a7f5eSAndroid Build Coastguard Worker 
2467*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_maskload_ps(float const * __p,__m256i __m)2468*344a7f5eSAndroid Build Coastguard Worker _mm256_maskload_ps(float const *__p, __m256i __m)
2469*344a7f5eSAndroid Build Coastguard Worker {
2470*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);
2471*344a7f5eSAndroid Build Coastguard Worker }
2472*344a7f5eSAndroid Build Coastguard Worker 
2473*344a7f5eSAndroid Build Coastguard Worker /* Conditional store ops */
2474*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_maskstore_ps(float * __p,__m256i __m,__m256 __a)2475*344a7f5eSAndroid Build Coastguard Worker _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
2476*344a7f5eSAndroid Build Coastguard Worker {
2477*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
2478*344a7f5eSAndroid Build Coastguard Worker }
2479*344a7f5eSAndroid Build Coastguard Worker 
2480*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm_maskstore_pd(double * __p,__m128i __m,__m128d __a)2481*344a7f5eSAndroid Build Coastguard Worker _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
2482*344a7f5eSAndroid Build Coastguard Worker {
2483*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
2484*344a7f5eSAndroid Build Coastguard Worker }
2485*344a7f5eSAndroid Build Coastguard Worker 
2486*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_maskstore_pd(double * __p,__m256i __m,__m256d __a)2487*344a7f5eSAndroid Build Coastguard Worker _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
2488*344a7f5eSAndroid Build Coastguard Worker {
2489*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
2490*344a7f5eSAndroid Build Coastguard Worker }
2491*344a7f5eSAndroid Build Coastguard Worker 
2492*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm_maskstore_ps(float * __p,__m128i __m,__m128 __a)2493*344a7f5eSAndroid Build Coastguard Worker _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
2494*344a7f5eSAndroid Build Coastguard Worker {
2495*344a7f5eSAndroid Build Coastguard Worker   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
2496*344a7f5eSAndroid Build Coastguard Worker }
2497*344a7f5eSAndroid Build Coastguard Worker 
2498*344a7f5eSAndroid Build Coastguard Worker /* Cacheability support ops */
2499*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(__m256i * __a,__m256i __b)2500*344a7f5eSAndroid Build Coastguard Worker _mm256_stream_si256(__m256i *__a, __m256i __b)
2501*344a7f5eSAndroid Build Coastguard Worker {
2502*344a7f5eSAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
2503*344a7f5eSAndroid Build Coastguard Worker }
2504*344a7f5eSAndroid Build Coastguard Worker 
2505*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(double * __a,__m256d __b)2506*344a7f5eSAndroid Build Coastguard Worker _mm256_stream_pd(double *__a, __m256d __b)
2507*344a7f5eSAndroid Build Coastguard Worker {
2508*344a7f5eSAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a);
2509*344a7f5eSAndroid Build Coastguard Worker }
2510*344a7f5eSAndroid Build Coastguard Worker 
2511*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(float * __p,__m256 __a)2512*344a7f5eSAndroid Build Coastguard Worker _mm256_stream_ps(float *__p, __m256 __a)
2513*344a7f5eSAndroid Build Coastguard Worker {
2514*344a7f5eSAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p);
2515*344a7f5eSAndroid Build Coastguard Worker }
2516*344a7f5eSAndroid Build Coastguard Worker 
2517*344a7f5eSAndroid Build Coastguard Worker /* Create vectors */
2518*344a7f5eSAndroid Build Coastguard Worker static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_undefined_pd(void)2519*344a7f5eSAndroid Build Coastguard Worker _mm256_undefined_pd(void)
2520*344a7f5eSAndroid Build Coastguard Worker {
2521*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_undef256();
2522*344a7f5eSAndroid Build Coastguard Worker }
2523*344a7f5eSAndroid Build Coastguard Worker 
2524*344a7f5eSAndroid Build Coastguard Worker static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_undefined_ps(void)2525*344a7f5eSAndroid Build Coastguard Worker _mm256_undefined_ps(void)
2526*344a7f5eSAndroid Build Coastguard Worker {
2527*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_undef256();
2528*344a7f5eSAndroid Build Coastguard Worker }
2529*344a7f5eSAndroid Build Coastguard Worker 
2530*344a7f5eSAndroid Build Coastguard Worker static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_undefined_si256(void)2531*344a7f5eSAndroid Build Coastguard Worker _mm256_undefined_si256(void)
2532*344a7f5eSAndroid Build Coastguard Worker {
2533*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_undef256();
2534*344a7f5eSAndroid Build Coastguard Worker }
2535*344a7f5eSAndroid Build Coastguard Worker 
2536*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_pd(double __a,double __b,double __c,double __d)2537*344a7f5eSAndroid Build Coastguard Worker _mm256_set_pd(double __a, double __b, double __c, double __d)
2538*344a7f5eSAndroid Build Coastguard Worker {
2539*344a7f5eSAndroid Build Coastguard Worker   return (__m256d){ __d, __c, __b, __a };
2540*344a7f5eSAndroid Build Coastguard Worker }
2541*344a7f5eSAndroid Build Coastguard Worker 
2542*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set_ps(float __a,float __b,float __c,float __d,float __e,float __f,float __g,float __h)2543*344a7f5eSAndroid Build Coastguard Worker _mm256_set_ps(float __a, float __b, float __c, float __d,
2544*344a7f5eSAndroid Build Coastguard Worker               float __e, float __f, float __g, float __h)
2545*344a7f5eSAndroid Build Coastguard Worker {
2546*344a7f5eSAndroid Build Coastguard Worker   return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
2547*344a7f5eSAndroid Build Coastguard Worker }
2548*344a7f5eSAndroid Build Coastguard Worker 
2549*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi32(int __i0,int __i1,int __i2,int __i3,int __i4,int __i5,int __i6,int __i7)2550*344a7f5eSAndroid Build Coastguard Worker _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
2551*344a7f5eSAndroid Build Coastguard Worker                  int __i4, int __i5, int __i6, int __i7)
2552*344a7f5eSAndroid Build Coastguard Worker {
2553*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
2554*344a7f5eSAndroid Build Coastguard Worker }
2555*344a7f5eSAndroid Build Coastguard Worker 
2556*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi16(short __w15,short __w14,short __w13,short __w12,short __w11,short __w10,short __w09,short __w08,short __w07,short __w06,short __w05,short __w04,short __w03,short __w02,short __w01,short __w00)2557*344a7f5eSAndroid Build Coastguard Worker _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
2558*344a7f5eSAndroid Build Coastguard Worker                  short __w11, short __w10, short __w09, short __w08,
2559*344a7f5eSAndroid Build Coastguard Worker                  short __w07, short __w06, short __w05, short __w04,
2560*344a7f5eSAndroid Build Coastguard Worker                  short __w03, short __w02, short __w01, short __w00)
2561*344a7f5eSAndroid Build Coastguard Worker {
2562*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
2563*344a7f5eSAndroid Build Coastguard Worker     __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
2564*344a7f5eSAndroid Build Coastguard Worker }
2565*344a7f5eSAndroid Build Coastguard Worker 
2566*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi8(char __b31,char __b30,char __b29,char __b28,char __b27,char __b26,char __b25,char __b24,char __b23,char __b22,char __b21,char __b20,char __b19,char __b18,char __b17,char __b16,char __b15,char __b14,char __b13,char __b12,char __b11,char __b10,char __b09,char __b08,char __b07,char __b06,char __b05,char __b04,char __b03,char __b02,char __b01,char __b00)2567*344a7f5eSAndroid Build Coastguard Worker _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
2568*344a7f5eSAndroid Build Coastguard Worker                 char __b27, char __b26, char __b25, char __b24,
2569*344a7f5eSAndroid Build Coastguard Worker                 char __b23, char __b22, char __b21, char __b20,
2570*344a7f5eSAndroid Build Coastguard Worker                 char __b19, char __b18, char __b17, char __b16,
2571*344a7f5eSAndroid Build Coastguard Worker                 char __b15, char __b14, char __b13, char __b12,
2572*344a7f5eSAndroid Build Coastguard Worker                 char __b11, char __b10, char __b09, char __b08,
2573*344a7f5eSAndroid Build Coastguard Worker                 char __b07, char __b06, char __b05, char __b04,
2574*344a7f5eSAndroid Build Coastguard Worker                 char __b03, char __b02, char __b01, char __b00)
2575*344a7f5eSAndroid Build Coastguard Worker {
2576*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v32qi){
2577*344a7f5eSAndroid Build Coastguard Worker     __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
2578*344a7f5eSAndroid Build Coastguard Worker     __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
2579*344a7f5eSAndroid Build Coastguard Worker     __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
2580*344a7f5eSAndroid Build Coastguard Worker     __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
2581*344a7f5eSAndroid Build Coastguard Worker   };
2582*344a7f5eSAndroid Build Coastguard Worker }
2583*344a7f5eSAndroid Build Coastguard Worker 
2584*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi64x(long long __a,long long __b,long long __c,long long __d)2585*344a7f5eSAndroid Build Coastguard Worker _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
2586*344a7f5eSAndroid Build Coastguard Worker {
2587*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v4di){ __d, __c, __b, __a };
2588*344a7f5eSAndroid Build Coastguard Worker }
2589*344a7f5eSAndroid Build Coastguard Worker 
2590*344a7f5eSAndroid Build Coastguard Worker /* Create vectors with elements in reverse order */
2591*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setr_pd(double __a,double __b,double __c,double __d)2592*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_pd(double __a, double __b, double __c, double __d)
2593*344a7f5eSAndroid Build Coastguard Worker {
2594*344a7f5eSAndroid Build Coastguard Worker   return (__m256d){ __a, __b, __c, __d };
2595*344a7f5eSAndroid Build Coastguard Worker }
2596*344a7f5eSAndroid Build Coastguard Worker 
2597*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setr_ps(float __a,float __b,float __c,float __d,float __e,float __f,float __g,float __h)2598*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_ps(float __a, float __b, float __c, float __d,
2599*344a7f5eSAndroid Build Coastguard Worker                float __e, float __f, float __g, float __h)
2600*344a7f5eSAndroid Build Coastguard Worker {
2601*344a7f5eSAndroid Build Coastguard Worker   return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
2602*344a7f5eSAndroid Build Coastguard Worker }
2603*344a7f5eSAndroid Build Coastguard Worker 
2604*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi32(int __i0,int __i1,int __i2,int __i3,int __i4,int __i5,int __i6,int __i7)2605*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
2606*344a7f5eSAndroid Build Coastguard Worker                   int __i4, int __i5, int __i6, int __i7)
2607*344a7f5eSAndroid Build Coastguard Worker {
2608*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
2609*344a7f5eSAndroid Build Coastguard Worker }
2610*344a7f5eSAndroid Build Coastguard Worker 
2611*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi16(short __w15,short __w14,short __w13,short __w12,short __w11,short __w10,short __w09,short __w08,short __w07,short __w06,short __w05,short __w04,short __w03,short __w02,short __w01,short __w00)2612*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
2613*344a7f5eSAndroid Build Coastguard Worker        short __w11, short __w10, short __w09, short __w08,
2614*344a7f5eSAndroid Build Coastguard Worker        short __w07, short __w06, short __w05, short __w04,
2615*344a7f5eSAndroid Build Coastguard Worker        short __w03, short __w02, short __w01, short __w00)
2616*344a7f5eSAndroid Build Coastguard Worker {
2617*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
2618*344a7f5eSAndroid Build Coastguard Worker     __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
2619*344a7f5eSAndroid Build Coastguard Worker }
2620*344a7f5eSAndroid Build Coastguard Worker 
2621*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi8(char __b31,char __b30,char __b29,char __b28,char __b27,char __b26,char __b25,char __b24,char __b23,char __b22,char __b21,char __b20,char __b19,char __b18,char __b17,char __b16,char __b15,char __b14,char __b13,char __b12,char __b11,char __b10,char __b09,char __b08,char __b07,char __b06,char __b05,char __b04,char __b03,char __b02,char __b01,char __b00)2622*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
2623*344a7f5eSAndroid Build Coastguard Worker                  char __b27, char __b26, char __b25, char __b24,
2624*344a7f5eSAndroid Build Coastguard Worker                  char __b23, char __b22, char __b21, char __b20,
2625*344a7f5eSAndroid Build Coastguard Worker                  char __b19, char __b18, char __b17, char __b16,
2626*344a7f5eSAndroid Build Coastguard Worker                  char __b15, char __b14, char __b13, char __b12,
2627*344a7f5eSAndroid Build Coastguard Worker                  char __b11, char __b10, char __b09, char __b08,
2628*344a7f5eSAndroid Build Coastguard Worker                  char __b07, char __b06, char __b05, char __b04,
2629*344a7f5eSAndroid Build Coastguard Worker                  char __b03, char __b02, char __b01, char __b00)
2630*344a7f5eSAndroid Build Coastguard Worker {
2631*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v32qi){
2632*344a7f5eSAndroid Build Coastguard Worker     __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
2633*344a7f5eSAndroid Build Coastguard Worker     __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
2634*344a7f5eSAndroid Build Coastguard Worker     __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
2635*344a7f5eSAndroid Build Coastguard Worker     __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
2636*344a7f5eSAndroid Build Coastguard Worker }
2637*344a7f5eSAndroid Build Coastguard Worker 
2638*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi64x(long long __a,long long __b,long long __c,long long __d)2639*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
2640*344a7f5eSAndroid Build Coastguard Worker {
2641*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v4di){ __a, __b, __c, __d };
2642*344a7f5eSAndroid Build Coastguard Worker }
2643*344a7f5eSAndroid Build Coastguard Worker 
2644*344a7f5eSAndroid Build Coastguard Worker /* Create vectors with repeated elements */
2645*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set1_pd(double __w)2646*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_pd(double __w)
2647*344a7f5eSAndroid Build Coastguard Worker {
2648*344a7f5eSAndroid Build Coastguard Worker   return (__m256d){ __w, __w, __w, __w };
2649*344a7f5eSAndroid Build Coastguard Worker }
2650*344a7f5eSAndroid Build Coastguard Worker 
2651*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set1_ps(float __w)2652*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_ps(float __w)
2653*344a7f5eSAndroid Build Coastguard Worker {
2654*344a7f5eSAndroid Build Coastguard Worker   return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
2655*344a7f5eSAndroid Build Coastguard Worker }
2656*344a7f5eSAndroid Build Coastguard Worker 
2657*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi32(int __i)2658*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_epi32(int __i)
2659*344a7f5eSAndroid Build Coastguard Worker {
2660*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
2661*344a7f5eSAndroid Build Coastguard Worker }
2662*344a7f5eSAndroid Build Coastguard Worker 
2663*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi16(short __w)2664*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_epi16(short __w)
2665*344a7f5eSAndroid Build Coastguard Worker {
2666*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
2667*344a7f5eSAndroid Build Coastguard Worker     __w, __w, __w, __w, __w, __w };
2668*344a7f5eSAndroid Build Coastguard Worker }
2669*344a7f5eSAndroid Build Coastguard Worker 
2670*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi8(char __b)2671*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_epi8(char __b)
2672*344a7f5eSAndroid Build Coastguard Worker {
2673*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
2674*344a7f5eSAndroid Build Coastguard Worker     __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
2675*344a7f5eSAndroid Build Coastguard Worker     __b, __b, __b, __b, __b, __b, __b };
2676*344a7f5eSAndroid Build Coastguard Worker }
2677*344a7f5eSAndroid Build Coastguard Worker 
2678*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi64x(long long __q)2679*344a7f5eSAndroid Build Coastguard Worker _mm256_set1_epi64x(long long __q)
2680*344a7f5eSAndroid Build Coastguard Worker {
2681*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)(__v4di){ __q, __q, __q, __q };
2682*344a7f5eSAndroid Build Coastguard Worker }
2683*344a7f5eSAndroid Build Coastguard Worker 
2684*344a7f5eSAndroid Build Coastguard Worker /* Create __zeroed vectors */
2685*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setzero_pd(void)2686*344a7f5eSAndroid Build Coastguard Worker _mm256_setzero_pd(void)
2687*344a7f5eSAndroid Build Coastguard Worker {
2688*344a7f5eSAndroid Build Coastguard Worker   return (__m256d){ 0, 0, 0, 0 };
2689*344a7f5eSAndroid Build Coastguard Worker }
2690*344a7f5eSAndroid Build Coastguard Worker 
2691*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setzero_ps(void)2692*344a7f5eSAndroid Build Coastguard Worker _mm256_setzero_ps(void)
2693*344a7f5eSAndroid Build Coastguard Worker {
2694*344a7f5eSAndroid Build Coastguard Worker   return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
2695*344a7f5eSAndroid Build Coastguard Worker }
2696*344a7f5eSAndroid Build Coastguard Worker 
2697*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setzero_si256(void)2698*344a7f5eSAndroid Build Coastguard Worker _mm256_setzero_si256(void)
2699*344a7f5eSAndroid Build Coastguard Worker {
2700*344a7f5eSAndroid Build Coastguard Worker   return (__m256i){ 0LL, 0LL, 0LL, 0LL };
2701*344a7f5eSAndroid Build Coastguard Worker }
2702*344a7f5eSAndroid Build Coastguard Worker 
2703*344a7f5eSAndroid Build Coastguard Worker /* Cast between vector types */
2704*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castpd_ps(__m256d __a)2705*344a7f5eSAndroid Build Coastguard Worker _mm256_castpd_ps(__m256d __a)
2706*344a7f5eSAndroid Build Coastguard Worker {
2707*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__a;
2708*344a7f5eSAndroid Build Coastguard Worker }
2709*344a7f5eSAndroid Build Coastguard Worker 
2710*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castpd_si256(__m256d __a)2711*344a7f5eSAndroid Build Coastguard Worker _mm256_castpd_si256(__m256d __a)
2712*344a7f5eSAndroid Build Coastguard Worker {
2713*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__a;
2714*344a7f5eSAndroid Build Coastguard Worker }
2715*344a7f5eSAndroid Build Coastguard Worker 
2716*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castps_pd(__m256 __a)2717*344a7f5eSAndroid Build Coastguard Worker _mm256_castps_pd(__m256 __a)
2718*344a7f5eSAndroid Build Coastguard Worker {
2719*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__a;
2720*344a7f5eSAndroid Build Coastguard Worker }
2721*344a7f5eSAndroid Build Coastguard Worker 
2722*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castps_si256(__m256 __a)2723*344a7f5eSAndroid Build Coastguard Worker _mm256_castps_si256(__m256 __a)
2724*344a7f5eSAndroid Build Coastguard Worker {
2725*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)__a;
2726*344a7f5eSAndroid Build Coastguard Worker }
2727*344a7f5eSAndroid Build Coastguard Worker 
2728*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castsi256_ps(__m256i __a)2729*344a7f5eSAndroid Build Coastguard Worker _mm256_castsi256_ps(__m256i __a)
2730*344a7f5eSAndroid Build Coastguard Worker {
2731*344a7f5eSAndroid Build Coastguard Worker   return (__m256)__a;
2732*344a7f5eSAndroid Build Coastguard Worker }
2733*344a7f5eSAndroid Build Coastguard Worker 
2734*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castsi256_pd(__m256i __a)2735*344a7f5eSAndroid Build Coastguard Worker _mm256_castsi256_pd(__m256i __a)
2736*344a7f5eSAndroid Build Coastguard Worker {
2737*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)__a;
2738*344a7f5eSAndroid Build Coastguard Worker }
2739*344a7f5eSAndroid Build Coastguard Worker 
2740*344a7f5eSAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS
_mm256_castpd256_pd128(__m256d __a)2741*344a7f5eSAndroid Build Coastguard Worker _mm256_castpd256_pd128(__m256d __a)
2742*344a7f5eSAndroid Build Coastguard Worker {
2743*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
2744*344a7f5eSAndroid Build Coastguard Worker }
2745*344a7f5eSAndroid Build Coastguard Worker 
2746*344a7f5eSAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm256_castps256_ps128(__m256 __a)2747*344a7f5eSAndroid Build Coastguard Worker _mm256_castps256_ps128(__m256 __a)
2748*344a7f5eSAndroid Build Coastguard Worker {
2749*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
2750*344a7f5eSAndroid Build Coastguard Worker }
2751*344a7f5eSAndroid Build Coastguard Worker 
2752*344a7f5eSAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_castsi256_si128(__m256i __a)2753*344a7f5eSAndroid Build Coastguard Worker _mm256_castsi256_si128(__m256i __a)
2754*344a7f5eSAndroid Build Coastguard Worker {
2755*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
2756*344a7f5eSAndroid Build Coastguard Worker }
2757*344a7f5eSAndroid Build Coastguard Worker 
2758*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castpd128_pd256(__m128d __a)2759*344a7f5eSAndroid Build Coastguard Worker _mm256_castpd128_pd256(__m128d __a)
2760*344a7f5eSAndroid Build Coastguard Worker {
2761*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);
2762*344a7f5eSAndroid Build Coastguard Worker }
2763*344a7f5eSAndroid Build Coastguard Worker 
2764*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castps128_ps256(__m128 __a)2765*344a7f5eSAndroid Build Coastguard Worker _mm256_castps128_ps256(__m128 __a)
2766*344a7f5eSAndroid Build Coastguard Worker {
2767*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);
2768*344a7f5eSAndroid Build Coastguard Worker }
2769*344a7f5eSAndroid Build Coastguard Worker 
2770*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castsi128_si256(__m128i __a)2771*344a7f5eSAndroid Build Coastguard Worker _mm256_castsi128_si256(__m128i __a)
2772*344a7f5eSAndroid Build Coastguard Worker {
2773*344a7f5eSAndroid Build Coastguard Worker   return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);
2774*344a7f5eSAndroid Build Coastguard Worker }
2775*344a7f5eSAndroid Build Coastguard Worker 
2776*344a7f5eSAndroid Build Coastguard Worker /*
2777*344a7f5eSAndroid Build Coastguard Worker    Vector insert.
2778*344a7f5eSAndroid Build Coastguard Worker    We use macros rather than inlines because we only want to accept
2779*344a7f5eSAndroid Build Coastguard Worker    invocations where the immediate M is a constant expression.
2780*344a7f5eSAndroid Build Coastguard Worker */
2781*344a7f5eSAndroid Build Coastguard Worker #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
2782*344a7f5eSAndroid Build Coastguard Worker   (__m256)__builtin_shufflevector( \
2783*344a7f5eSAndroid Build Coastguard Worker     (__v8sf)(__m256)(V1), \
2784*344a7f5eSAndroid Build Coastguard Worker     (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
2785*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  0 :  8), \
2786*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  1 :  9), \
2787*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  2 : 10), \
2788*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  3 : 11), \
2789*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  8 :  4), \
2790*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ?  9 :  5), \
2791*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 10 :  6), \
2792*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 11 :  7) );})
2793*344a7f5eSAndroid Build Coastguard Worker 
2794*344a7f5eSAndroid Build Coastguard Worker #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
2795*344a7f5eSAndroid Build Coastguard Worker   (__m256d)__builtin_shufflevector( \
2796*344a7f5eSAndroid Build Coastguard Worker     (__v4df)(__m256d)(V1), \
2797*344a7f5eSAndroid Build Coastguard Worker     (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
2798*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 0 : 4), \
2799*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 1 : 5), \
2800*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 4 : 2), \
2801*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 5 : 3) );})
2802*344a7f5eSAndroid Build Coastguard Worker 
2803*344a7f5eSAndroid Build Coastguard Worker #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
2804*344a7f5eSAndroid Build Coastguard Worker   (__m256i)__builtin_shufflevector( \
2805*344a7f5eSAndroid Build Coastguard Worker     (__v4di)(__m256i)(V1), \
2806*344a7f5eSAndroid Build Coastguard Worker     (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
2807*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 0 : 4), \
2808*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 1 : 5), \
2809*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 4 : 2), \
2810*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 5 : 3) );})
2811*344a7f5eSAndroid Build Coastguard Worker 
2812*344a7f5eSAndroid Build Coastguard Worker /*
2813*344a7f5eSAndroid Build Coastguard Worker    Vector extract.
2814*344a7f5eSAndroid Build Coastguard Worker    We use macros rather than inlines because we only want to accept
2815*344a7f5eSAndroid Build Coastguard Worker    invocations where the immediate M is a constant expression.
2816*344a7f5eSAndroid Build Coastguard Worker */
2817*344a7f5eSAndroid Build Coastguard Worker #define _mm256_extractf128_ps(V, M) __extension__ ({ \
2818*344a7f5eSAndroid Build Coastguard Worker   (__m128)__builtin_shufflevector( \
2819*344a7f5eSAndroid Build Coastguard Worker     (__v8sf)(__m256)(V), \
2820*344a7f5eSAndroid Build Coastguard Worker     (__v8sf)(_mm256_undefined_ps()), \
2821*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 4 : 0), \
2822*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 5 : 1), \
2823*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 6 : 2), \
2824*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 7 : 3) );})
2825*344a7f5eSAndroid Build Coastguard Worker 
2826*344a7f5eSAndroid Build Coastguard Worker #define _mm256_extractf128_pd(V, M) __extension__ ({ \
2827*344a7f5eSAndroid Build Coastguard Worker   (__m128d)__builtin_shufflevector( \
2828*344a7f5eSAndroid Build Coastguard Worker     (__v4df)(__m256d)(V), \
2829*344a7f5eSAndroid Build Coastguard Worker     (__v4df)(_mm256_undefined_pd()), \
2830*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 2 : 0), \
2831*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 3 : 1) );})
2832*344a7f5eSAndroid Build Coastguard Worker 
2833*344a7f5eSAndroid Build Coastguard Worker #define _mm256_extractf128_si256(V, M) __extension__ ({ \
2834*344a7f5eSAndroid Build Coastguard Worker   (__m128i)__builtin_shufflevector( \
2835*344a7f5eSAndroid Build Coastguard Worker     (__v4di)(__m256i)(V), \
2836*344a7f5eSAndroid Build Coastguard Worker     (__v4di)(_mm256_undefined_si256()), \
2837*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 2 : 0), \
2838*344a7f5eSAndroid Build Coastguard Worker     (((M) & 1) ? 3 : 1) );})
2839*344a7f5eSAndroid Build Coastguard Worker 
2840*344a7f5eSAndroid Build Coastguard Worker /* SIMD load ops (unaligned) */
2841*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu2_m128(float const * __addr_hi,float const * __addr_lo)2842*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
2843*344a7f5eSAndroid Build Coastguard Worker {
2844*344a7f5eSAndroid Build Coastguard Worker   __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));
2845*344a7f5eSAndroid Build Coastguard Worker   return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);
2846*344a7f5eSAndroid Build Coastguard Worker }
2847*344a7f5eSAndroid Build Coastguard Worker 
2848*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu2_m128d(double const * __addr_hi,double const * __addr_lo)2849*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
2850*344a7f5eSAndroid Build Coastguard Worker {
2851*344a7f5eSAndroid Build Coastguard Worker   __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));
2852*344a7f5eSAndroid Build Coastguard Worker   return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);
2853*344a7f5eSAndroid Build Coastguard Worker }
2854*344a7f5eSAndroid Build Coastguard Worker 
2855*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu2_m128i(__m128i const * __addr_hi,__m128i const * __addr_lo)2856*344a7f5eSAndroid Build Coastguard Worker _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
2857*344a7f5eSAndroid Build Coastguard Worker {
2858*344a7f5eSAndroid Build Coastguard Worker   __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
2859*344a7f5eSAndroid Build Coastguard Worker   return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
2860*344a7f5eSAndroid Build Coastguard Worker }
2861*344a7f5eSAndroid Build Coastguard Worker 
2862*344a7f5eSAndroid Build Coastguard Worker /* SIMD store ops (unaligned) */
2863*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128(float * __addr_hi,float * __addr_lo,__m256 __a)2864*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
2865*344a7f5eSAndroid Build Coastguard Worker {
2866*344a7f5eSAndroid Build Coastguard Worker   __m128 __v128;
2867*344a7f5eSAndroid Build Coastguard Worker 
2868*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_castps256_ps128(__a);
2869*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_ps(__addr_lo, __v128);
2870*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_ps(__a, 1);
2871*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_ps(__addr_hi, __v128);
2872*344a7f5eSAndroid Build Coastguard Worker }
2873*344a7f5eSAndroid Build Coastguard Worker 
2874*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128d(double * __addr_hi,double * __addr_lo,__m256d __a)2875*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
2876*344a7f5eSAndroid Build Coastguard Worker {
2877*344a7f5eSAndroid Build Coastguard Worker   __m128d __v128;
2878*344a7f5eSAndroid Build Coastguard Worker 
2879*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_castpd256_pd128(__a);
2880*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_pd(__addr_lo, __v128);
2881*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_pd(__a, 1);
2882*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_pd(__addr_hi, __v128);
2883*344a7f5eSAndroid Build Coastguard Worker }
2884*344a7f5eSAndroid Build Coastguard Worker 
2885*344a7f5eSAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128i(__m128i * __addr_hi,__m128i * __addr_lo,__m256i __a)2886*344a7f5eSAndroid Build Coastguard Worker _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
2887*344a7f5eSAndroid Build Coastguard Worker {
2888*344a7f5eSAndroid Build Coastguard Worker   __m128i __v128;
2889*344a7f5eSAndroid Build Coastguard Worker 
2890*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_castsi256_si128(__a);
2891*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_si128(__addr_lo, __v128);
2892*344a7f5eSAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_si256(__a, 1);
2893*344a7f5eSAndroid Build Coastguard Worker   _mm_storeu_si128(__addr_hi, __v128);
2894*344a7f5eSAndroid Build Coastguard Worker }
2895*344a7f5eSAndroid Build Coastguard Worker 
2896*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set_m128(__m128 __hi,__m128 __lo)2897*344a7f5eSAndroid Build Coastguard Worker _mm256_set_m128 (__m128 __hi, __m128 __lo) {
2898*344a7f5eSAndroid Build Coastguard Worker   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
2899*344a7f5eSAndroid Build Coastguard Worker }
2900*344a7f5eSAndroid Build Coastguard Worker 
2901*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_m128d(__m128d __hi,__m128d __lo)2902*344a7f5eSAndroid Build Coastguard Worker _mm256_set_m128d (__m128d __hi, __m128d __lo) {
2903*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
2904*344a7f5eSAndroid Build Coastguard Worker }
2905*344a7f5eSAndroid Build Coastguard Worker 
2906*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_m128i(__m128i __hi,__m128i __lo)2907*344a7f5eSAndroid Build Coastguard Worker _mm256_set_m128i (__m128i __hi, __m128i __lo) {
2908*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
2909*344a7f5eSAndroid Build Coastguard Worker }
2910*344a7f5eSAndroid Build Coastguard Worker 
2911*344a7f5eSAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setr_m128(__m128 __lo,__m128 __hi)2912*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
2913*344a7f5eSAndroid Build Coastguard Worker   return _mm256_set_m128(__hi, __lo);
2914*344a7f5eSAndroid Build Coastguard Worker }
2915*344a7f5eSAndroid Build Coastguard Worker 
2916*344a7f5eSAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setr_m128d(__m128d __lo,__m128d __hi)2917*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
2918*344a7f5eSAndroid Build Coastguard Worker   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
2919*344a7f5eSAndroid Build Coastguard Worker }
2920*344a7f5eSAndroid Build Coastguard Worker 
2921*344a7f5eSAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_m128i(__m128i __lo,__m128i __hi)2922*344a7f5eSAndroid Build Coastguard Worker _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
2923*344a7f5eSAndroid Build Coastguard Worker   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
2924*344a7f5eSAndroid Build Coastguard Worker }
2925*344a7f5eSAndroid Build Coastguard Worker 
2926*344a7f5eSAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS
2927*344a7f5eSAndroid Build Coastguard Worker 
2928*344a7f5eSAndroid Build Coastguard Worker #endif /* __AVXINTRIN_H */
2929