xref: /aosp_15_r20/prebuilts/clang-tools/linux-x86/clang-headers/avxintrin.h (revision bed243d3d9cd544cfb038bfa7be843dedc6e6bf7)
1*bed243d3SAndroid Build Coastguard Worker /*===---- avxintrin.h - AVX intrinsics -------------------------------------===
2*bed243d3SAndroid Build Coastguard Worker  *
3*bed243d3SAndroid Build Coastguard Worker  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bed243d3SAndroid Build Coastguard Worker  * See https://llvm.org/LICENSE.txt for license information.
5*bed243d3SAndroid Build Coastguard Worker  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bed243d3SAndroid Build Coastguard Worker  *
7*bed243d3SAndroid Build Coastguard Worker  *===-----------------------------------------------------------------------===
8*bed243d3SAndroid Build Coastguard Worker  */
9*bed243d3SAndroid Build Coastguard Worker 
10*bed243d3SAndroid Build Coastguard Worker #ifndef __IMMINTRIN_H
11*bed243d3SAndroid Build Coastguard Worker #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
12*bed243d3SAndroid Build Coastguard Worker #endif
13*bed243d3SAndroid Build Coastguard Worker 
14*bed243d3SAndroid Build Coastguard Worker #ifndef __AVXINTRIN_H
15*bed243d3SAndroid Build Coastguard Worker #define __AVXINTRIN_H
16*bed243d3SAndroid Build Coastguard Worker 
17*bed243d3SAndroid Build Coastguard Worker typedef double __v4df __attribute__ ((__vector_size__ (32)));
18*bed243d3SAndroid Build Coastguard Worker typedef float __v8sf __attribute__ ((__vector_size__ (32)));
19*bed243d3SAndroid Build Coastguard Worker typedef long long __v4di __attribute__ ((__vector_size__ (32)));
20*bed243d3SAndroid Build Coastguard Worker typedef int __v8si __attribute__ ((__vector_size__ (32)));
21*bed243d3SAndroid Build Coastguard Worker typedef short __v16hi __attribute__ ((__vector_size__ (32)));
22*bed243d3SAndroid Build Coastguard Worker typedef char __v32qi __attribute__ ((__vector_size__ (32)));
23*bed243d3SAndroid Build Coastguard Worker 
24*bed243d3SAndroid Build Coastguard Worker /* Unsigned types */
25*bed243d3SAndroid Build Coastguard Worker typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
26*bed243d3SAndroid Build Coastguard Worker typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
27*bed243d3SAndroid Build Coastguard Worker typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
28*bed243d3SAndroid Build Coastguard Worker typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
29*bed243d3SAndroid Build Coastguard Worker 
30*bed243d3SAndroid Build Coastguard Worker /* We need an explicitly signed variant for char. Note that this shouldn't
31*bed243d3SAndroid Build Coastguard Worker  * appear in the interface though. */
32*bed243d3SAndroid Build Coastguard Worker typedef signed char __v32qs __attribute__((__vector_size__(32)));
33*bed243d3SAndroid Build Coastguard Worker 
34*bed243d3SAndroid Build Coastguard Worker typedef float __m256 __attribute__ ((__vector_size__ (32), __aligned__(32)));
35*bed243d3SAndroid Build Coastguard Worker typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32)));
36*bed243d3SAndroid Build Coastguard Worker typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32)));
37*bed243d3SAndroid Build Coastguard Worker 
38*bed243d3SAndroid Build Coastguard Worker typedef float __m256_u __attribute__ ((__vector_size__ (32), __aligned__(1)));
39*bed243d3SAndroid Build Coastguard Worker typedef double __m256d_u __attribute__((__vector_size__(32), __aligned__(1)));
40*bed243d3SAndroid Build Coastguard Worker typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1)));
41*bed243d3SAndroid Build Coastguard Worker 
42*bed243d3SAndroid Build Coastguard Worker #ifdef __SSE2__
43*bed243d3SAndroid Build Coastguard Worker /* Both _Float16 and __bf16 require SSE2 being enabled. */
44*bed243d3SAndroid Build Coastguard Worker typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
45*bed243d3SAndroid Build Coastguard Worker typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
46*bed243d3SAndroid Build Coastguard Worker typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
47*bed243d3SAndroid Build Coastguard Worker 
48*bed243d3SAndroid Build Coastguard Worker typedef __bf16 __v16bf __attribute__((__vector_size__(32), __aligned__(32)));
49*bed243d3SAndroid Build Coastguard Worker typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
50*bed243d3SAndroid Build Coastguard Worker #endif
51*bed243d3SAndroid Build Coastguard Worker 
52*bed243d3SAndroid Build Coastguard Worker /* Define the default attributes for the functions in this file. */
53*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS                                                     \
54*bed243d3SAndroid Build Coastguard Worker   __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
55*bed243d3SAndroid Build Coastguard Worker                  __min_vector_width__(256)))
56*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS128                                                  \
57*bed243d3SAndroid Build Coastguard Worker   __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \
58*bed243d3SAndroid Build Coastguard Worker                  __min_vector_width__(128)))
59*bed243d3SAndroid Build Coastguard Worker 
60*bed243d3SAndroid Build Coastguard Worker /* Arithmetic */
61*bed243d3SAndroid Build Coastguard Worker /// Adds two 256-bit vectors of [4 x double].
62*bed243d3SAndroid Build Coastguard Worker ///
63*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
64*bed243d3SAndroid Build Coastguard Worker ///
65*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VADDPD </c> instruction.
66*bed243d3SAndroid Build Coastguard Worker ///
67*bed243d3SAndroid Build Coastguard Worker /// \param __a
68*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
69*bed243d3SAndroid Build Coastguard Worker /// \param __b
70*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
71*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the sums of both
72*bed243d3SAndroid Build Coastguard Worker ///    operands.
73*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_add_pd(__m256d __a,__m256d __b)74*bed243d3SAndroid Build Coastguard Worker _mm256_add_pd(__m256d __a, __m256d __b)
75*bed243d3SAndroid Build Coastguard Worker {
76*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a+(__v4df)__b);
77*bed243d3SAndroid Build Coastguard Worker }
78*bed243d3SAndroid Build Coastguard Worker 
79*bed243d3SAndroid Build Coastguard Worker /// Adds two 256-bit vectors of [8 x float].
80*bed243d3SAndroid Build Coastguard Worker ///
81*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
82*bed243d3SAndroid Build Coastguard Worker ///
83*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VADDPS </c> instruction.
84*bed243d3SAndroid Build Coastguard Worker ///
85*bed243d3SAndroid Build Coastguard Worker /// \param __a
86*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
87*bed243d3SAndroid Build Coastguard Worker /// \param __b
88*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
89*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the sums of both
90*bed243d3SAndroid Build Coastguard Worker ///    operands.
91*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_add_ps(__m256 __a,__m256 __b)92*bed243d3SAndroid Build Coastguard Worker _mm256_add_ps(__m256 __a, __m256 __b)
93*bed243d3SAndroid Build Coastguard Worker {
94*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a+(__v8sf)__b);
95*bed243d3SAndroid Build Coastguard Worker }
96*bed243d3SAndroid Build Coastguard Worker 
97*bed243d3SAndroid Build Coastguard Worker /// Subtracts two 256-bit vectors of [4 x double].
98*bed243d3SAndroid Build Coastguard Worker ///
99*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
100*bed243d3SAndroid Build Coastguard Worker ///
101*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSUBPD </c> instruction.
102*bed243d3SAndroid Build Coastguard Worker ///
103*bed243d3SAndroid Build Coastguard Worker /// \param __a
104*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the minuend.
105*bed243d3SAndroid Build Coastguard Worker /// \param __b
106*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the subtrahend.
107*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the differences between
108*bed243d3SAndroid Build Coastguard Worker ///    both operands.
109*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_sub_pd(__m256d __a,__m256d __b)110*bed243d3SAndroid Build Coastguard Worker _mm256_sub_pd(__m256d __a, __m256d __b)
111*bed243d3SAndroid Build Coastguard Worker {
112*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a-(__v4df)__b);
113*bed243d3SAndroid Build Coastguard Worker }
114*bed243d3SAndroid Build Coastguard Worker 
115*bed243d3SAndroid Build Coastguard Worker /// Subtracts two 256-bit vectors of [8 x float].
116*bed243d3SAndroid Build Coastguard Worker ///
117*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
118*bed243d3SAndroid Build Coastguard Worker ///
119*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSUBPS </c> instruction.
120*bed243d3SAndroid Build Coastguard Worker ///
121*bed243d3SAndroid Build Coastguard Worker /// \param __a
122*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the minuend.
123*bed243d3SAndroid Build Coastguard Worker /// \param __b
124*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the subtrahend.
125*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the differences between
126*bed243d3SAndroid Build Coastguard Worker ///    both operands.
127*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_sub_ps(__m256 __a,__m256 __b)128*bed243d3SAndroid Build Coastguard Worker _mm256_sub_ps(__m256 __a, __m256 __b)
129*bed243d3SAndroid Build Coastguard Worker {
130*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a-(__v8sf)__b);
131*bed243d3SAndroid Build Coastguard Worker }
132*bed243d3SAndroid Build Coastguard Worker 
133*bed243d3SAndroid Build Coastguard Worker /// Adds the even-indexed values and subtracts the odd-indexed values of
134*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double].
135*bed243d3SAndroid Build Coastguard Worker ///
136*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
137*bed243d3SAndroid Build Coastguard Worker ///
138*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
139*bed243d3SAndroid Build Coastguard Worker ///
140*bed243d3SAndroid Build Coastguard Worker /// \param __a
141*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the left source operand.
142*bed243d3SAndroid Build Coastguard Worker /// \param __b
143*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the right source operand.
144*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the alternating sums
145*bed243d3SAndroid Build Coastguard Worker ///    and differences between both operands.
146*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_addsub_pd(__m256d __a,__m256d __b)147*bed243d3SAndroid Build Coastguard Worker _mm256_addsub_pd(__m256d __a, __m256d __b)
148*bed243d3SAndroid Build Coastguard Worker {
149*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
150*bed243d3SAndroid Build Coastguard Worker }
151*bed243d3SAndroid Build Coastguard Worker 
152*bed243d3SAndroid Build Coastguard Worker /// Adds the even-indexed values and subtracts the odd-indexed values of
153*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float].
154*bed243d3SAndroid Build Coastguard Worker ///
155*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
156*bed243d3SAndroid Build Coastguard Worker ///
157*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
158*bed243d3SAndroid Build Coastguard Worker ///
159*bed243d3SAndroid Build Coastguard Worker /// \param __a
160*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the left source operand.
161*bed243d3SAndroid Build Coastguard Worker /// \param __b
162*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the right source operand.
163*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the alternating sums and
164*bed243d3SAndroid Build Coastguard Worker ///    differences between both operands.
165*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_addsub_ps(__m256 __a,__m256 __b)166*bed243d3SAndroid Build Coastguard Worker _mm256_addsub_ps(__m256 __a, __m256 __b)
167*bed243d3SAndroid Build Coastguard Worker {
168*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
169*bed243d3SAndroid Build Coastguard Worker }
170*bed243d3SAndroid Build Coastguard Worker 
171*bed243d3SAndroid Build Coastguard Worker /// Divides two 256-bit vectors of [4 x double].
172*bed243d3SAndroid Build Coastguard Worker ///
173*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
174*bed243d3SAndroid Build Coastguard Worker ///
175*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VDIVPD </c> instruction.
176*bed243d3SAndroid Build Coastguard Worker ///
177*bed243d3SAndroid Build Coastguard Worker /// \param __a
178*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the dividend.
179*bed243d3SAndroid Build Coastguard Worker /// \param __b
180*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the divisor.
181*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the quotients of both
182*bed243d3SAndroid Build Coastguard Worker ///    operands.
183*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_div_pd(__m256d __a,__m256d __b)184*bed243d3SAndroid Build Coastguard Worker _mm256_div_pd(__m256d __a, __m256d __b)
185*bed243d3SAndroid Build Coastguard Worker {
186*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a/(__v4df)__b);
187*bed243d3SAndroid Build Coastguard Worker }
188*bed243d3SAndroid Build Coastguard Worker 
189*bed243d3SAndroid Build Coastguard Worker /// Divides two 256-bit vectors of [8 x float].
190*bed243d3SAndroid Build Coastguard Worker ///
191*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
192*bed243d3SAndroid Build Coastguard Worker ///
193*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VDIVPS </c> instruction.
194*bed243d3SAndroid Build Coastguard Worker ///
195*bed243d3SAndroid Build Coastguard Worker /// \param __a
196*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the dividend.
197*bed243d3SAndroid Build Coastguard Worker /// \param __b
198*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the divisor.
199*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the quotients of both
200*bed243d3SAndroid Build Coastguard Worker ///    operands.
201*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_div_ps(__m256 __a,__m256 __b)202*bed243d3SAndroid Build Coastguard Worker _mm256_div_ps(__m256 __a, __m256 __b)
203*bed243d3SAndroid Build Coastguard Worker {
204*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a/(__v8sf)__b);
205*bed243d3SAndroid Build Coastguard Worker }
206*bed243d3SAndroid Build Coastguard Worker 
207*bed243d3SAndroid Build Coastguard Worker /// Compares two 256-bit vectors of [4 x double] and returns the greater
208*bed243d3SAndroid Build Coastguard Worker ///    of each pair of values.
209*bed243d3SAndroid Build Coastguard Worker ///
210*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
211*bed243d3SAndroid Build Coastguard Worker ///
212*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMAXPD </c> instruction.
213*bed243d3SAndroid Build Coastguard Worker ///
214*bed243d3SAndroid Build Coastguard Worker /// \param __a
215*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
216*bed243d3SAndroid Build Coastguard Worker /// \param __b
217*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
218*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the maximum values
219*bed243d3SAndroid Build Coastguard Worker ///    between both operands.
220*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_max_pd(__m256d __a,__m256d __b)221*bed243d3SAndroid Build Coastguard Worker _mm256_max_pd(__m256d __a, __m256d __b)
222*bed243d3SAndroid Build Coastguard Worker {
223*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
224*bed243d3SAndroid Build Coastguard Worker }
225*bed243d3SAndroid Build Coastguard Worker 
226*bed243d3SAndroid Build Coastguard Worker /// Compares two 256-bit vectors of [8 x float] and returns the greater
227*bed243d3SAndroid Build Coastguard Worker ///    of each pair of values.
228*bed243d3SAndroid Build Coastguard Worker ///
229*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
230*bed243d3SAndroid Build Coastguard Worker ///
231*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMAXPS </c> instruction.
232*bed243d3SAndroid Build Coastguard Worker ///
233*bed243d3SAndroid Build Coastguard Worker /// \param __a
234*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
235*bed243d3SAndroid Build Coastguard Worker /// \param __b
236*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
237*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the maximum values
238*bed243d3SAndroid Build Coastguard Worker ///    between both operands.
239*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_max_ps(__m256 __a,__m256 __b)240*bed243d3SAndroid Build Coastguard Worker _mm256_max_ps(__m256 __a, __m256 __b)
241*bed243d3SAndroid Build Coastguard Worker {
242*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
243*bed243d3SAndroid Build Coastguard Worker }
244*bed243d3SAndroid Build Coastguard Worker 
245*bed243d3SAndroid Build Coastguard Worker /// Compares two 256-bit vectors of [4 x double] and returns the lesser
246*bed243d3SAndroid Build Coastguard Worker ///    of each pair of values.
247*bed243d3SAndroid Build Coastguard Worker ///
248*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
249*bed243d3SAndroid Build Coastguard Worker ///
250*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMINPD </c> instruction.
251*bed243d3SAndroid Build Coastguard Worker ///
252*bed243d3SAndroid Build Coastguard Worker /// \param __a
253*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
254*bed243d3SAndroid Build Coastguard Worker /// \param __b
255*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
256*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the minimum values
257*bed243d3SAndroid Build Coastguard Worker ///    between both operands.
258*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_min_pd(__m256d __a,__m256d __b)259*bed243d3SAndroid Build Coastguard Worker _mm256_min_pd(__m256d __a, __m256d __b)
260*bed243d3SAndroid Build Coastguard Worker {
261*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
262*bed243d3SAndroid Build Coastguard Worker }
263*bed243d3SAndroid Build Coastguard Worker 
264*bed243d3SAndroid Build Coastguard Worker /// Compares two 256-bit vectors of [8 x float] and returns the lesser
265*bed243d3SAndroid Build Coastguard Worker ///    of each pair of values.
266*bed243d3SAndroid Build Coastguard Worker ///
267*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
268*bed243d3SAndroid Build Coastguard Worker ///
269*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMINPS </c> instruction.
270*bed243d3SAndroid Build Coastguard Worker ///
271*bed243d3SAndroid Build Coastguard Worker /// \param __a
272*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
273*bed243d3SAndroid Build Coastguard Worker /// \param __b
274*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
275*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the minimum values
276*bed243d3SAndroid Build Coastguard Worker ///    between both operands.
277*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_min_ps(__m256 __a,__m256 __b)278*bed243d3SAndroid Build Coastguard Worker _mm256_min_ps(__m256 __a, __m256 __b)
279*bed243d3SAndroid Build Coastguard Worker {
280*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
281*bed243d3SAndroid Build Coastguard Worker }
282*bed243d3SAndroid Build Coastguard Worker 
283*bed243d3SAndroid Build Coastguard Worker /// Multiplies two 256-bit vectors of [4 x double].
284*bed243d3SAndroid Build Coastguard Worker ///
285*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
286*bed243d3SAndroid Build Coastguard Worker ///
287*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMULPD </c> instruction.
288*bed243d3SAndroid Build Coastguard Worker ///
289*bed243d3SAndroid Build Coastguard Worker /// \param __a
290*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
291*bed243d3SAndroid Build Coastguard Worker /// \param __b
292*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the operands.
293*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the products of both
294*bed243d3SAndroid Build Coastguard Worker ///    operands.
295*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_mul_pd(__m256d __a,__m256d __b)296*bed243d3SAndroid Build Coastguard Worker _mm256_mul_pd(__m256d __a, __m256d __b)
297*bed243d3SAndroid Build Coastguard Worker {
298*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4df)__a * (__v4df)__b);
299*bed243d3SAndroid Build Coastguard Worker }
300*bed243d3SAndroid Build Coastguard Worker 
301*bed243d3SAndroid Build Coastguard Worker /// Multiplies two 256-bit vectors of [8 x float].
302*bed243d3SAndroid Build Coastguard Worker ///
303*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
304*bed243d3SAndroid Build Coastguard Worker ///
305*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMULPS </c> instruction.
306*bed243d3SAndroid Build Coastguard Worker ///
307*bed243d3SAndroid Build Coastguard Worker /// \param __a
308*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
309*bed243d3SAndroid Build Coastguard Worker /// \param __b
310*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the operands.
311*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the products of both
312*bed243d3SAndroid Build Coastguard Worker ///    operands.
313*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_mul_ps(__m256 __a,__m256 __b)314*bed243d3SAndroid Build Coastguard Worker _mm256_mul_ps(__m256 __a, __m256 __b)
315*bed243d3SAndroid Build Coastguard Worker {
316*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8sf)__a * (__v8sf)__b);
317*bed243d3SAndroid Build Coastguard Worker }
318*bed243d3SAndroid Build Coastguard Worker 
319*bed243d3SAndroid Build Coastguard Worker /// Calculates the square roots of the values in a 256-bit vector of
320*bed243d3SAndroid Build Coastguard Worker ///    [4 x double].
321*bed243d3SAndroid Build Coastguard Worker ///
322*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
323*bed243d3SAndroid Build Coastguard Worker ///
324*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.
325*bed243d3SAndroid Build Coastguard Worker ///
326*bed243d3SAndroid Build Coastguard Worker /// \param __a
327*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
328*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the square roots of the
329*bed243d3SAndroid Build Coastguard Worker ///    values in the operand.
330*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_sqrt_pd(__m256d __a)331*bed243d3SAndroid Build Coastguard Worker _mm256_sqrt_pd(__m256d __a)
332*bed243d3SAndroid Build Coastguard Worker {
333*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
334*bed243d3SAndroid Build Coastguard Worker }
335*bed243d3SAndroid Build Coastguard Worker 
336*bed243d3SAndroid Build Coastguard Worker /// Calculates the square roots of the values in a 256-bit vector of
337*bed243d3SAndroid Build Coastguard Worker ///    [8 x float].
338*bed243d3SAndroid Build Coastguard Worker ///
339*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
340*bed243d3SAndroid Build Coastguard Worker ///
341*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.
342*bed243d3SAndroid Build Coastguard Worker ///
343*bed243d3SAndroid Build Coastguard Worker /// \param __a
344*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
345*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the square roots of the
346*bed243d3SAndroid Build Coastguard Worker ///    values in the operand.
347*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_sqrt_ps(__m256 __a)348*bed243d3SAndroid Build Coastguard Worker _mm256_sqrt_ps(__m256 __a)
349*bed243d3SAndroid Build Coastguard Worker {
350*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
351*bed243d3SAndroid Build Coastguard Worker }
352*bed243d3SAndroid Build Coastguard Worker 
353*bed243d3SAndroid Build Coastguard Worker /// Calculates the reciprocal square roots of the values in a 256-bit
354*bed243d3SAndroid Build Coastguard Worker ///    vector of [8 x float].
355*bed243d3SAndroid Build Coastguard Worker ///
356*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
357*bed243d3SAndroid Build Coastguard Worker ///
358*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.
359*bed243d3SAndroid Build Coastguard Worker ///
360*bed243d3SAndroid Build Coastguard Worker /// \param __a
361*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
362*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the reciprocal square
363*bed243d3SAndroid Build Coastguard Worker ///    roots of the values in the operand.
364*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_rsqrt_ps(__m256 __a)365*bed243d3SAndroid Build Coastguard Worker _mm256_rsqrt_ps(__m256 __a)
366*bed243d3SAndroid Build Coastguard Worker {
367*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
368*bed243d3SAndroid Build Coastguard Worker }
369*bed243d3SAndroid Build Coastguard Worker 
370*bed243d3SAndroid Build Coastguard Worker /// Calculates the reciprocals of the values in a 256-bit vector of
371*bed243d3SAndroid Build Coastguard Worker ///    [8 x float].
372*bed243d3SAndroid Build Coastguard Worker ///
373*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
374*bed243d3SAndroid Build Coastguard Worker ///
375*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VRCPPS </c> instruction.
376*bed243d3SAndroid Build Coastguard Worker ///
377*bed243d3SAndroid Build Coastguard Worker /// \param __a
378*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
379*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the reciprocals of the
380*bed243d3SAndroid Build Coastguard Worker ///    values in the operand.
381*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_rcp_ps(__m256 __a)382*bed243d3SAndroid Build Coastguard Worker _mm256_rcp_ps(__m256 __a)
383*bed243d3SAndroid Build Coastguard Worker {
384*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
385*bed243d3SAndroid Build Coastguard Worker }
386*bed243d3SAndroid Build Coastguard Worker 
387*bed243d3SAndroid Build Coastguard Worker /// Rounds the values in a 256-bit vector of [4 x double] as specified
388*bed243d3SAndroid Build Coastguard Worker ///    by the byte operand. The source values are rounded to integer values and
389*bed243d3SAndroid Build Coastguard Worker ///    returned as 64-bit double-precision floating-point values.
390*bed243d3SAndroid Build Coastguard Worker ///
391*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
392*bed243d3SAndroid Build Coastguard Worker ///
393*bed243d3SAndroid Build Coastguard Worker /// \code
394*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_round_pd(__m256d V, const int M);
395*bed243d3SAndroid Build Coastguard Worker /// \endcode
396*bed243d3SAndroid Build Coastguard Worker ///
397*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.
398*bed243d3SAndroid Build Coastguard Worker ///
399*bed243d3SAndroid Build Coastguard Worker /// \param V
400*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
401*bed243d3SAndroid Build Coastguard Worker /// \param M
402*bed243d3SAndroid Build Coastguard Worker ///    An integer value that specifies the rounding operation. \n
403*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:4] are reserved. \n
404*bed243d3SAndroid Build Coastguard Worker ///    Bit [3] is a precision exception value: \n
405*bed243d3SAndroid Build Coastguard Worker ///      0: A normal PE exception is used. \n
406*bed243d3SAndroid Build Coastguard Worker ///      1: The PE field is not updated. \n
407*bed243d3SAndroid Build Coastguard Worker ///    Bit [2] is the rounding control source: \n
408*bed243d3SAndroid Build Coastguard Worker ///      0: Use bits [1:0] of \a M. \n
409*bed243d3SAndroid Build Coastguard Worker ///      1: Use the current MXCSR setting. \n
410*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0] contain the rounding control definition: \n
411*bed243d3SAndroid Build Coastguard Worker ///      00: Nearest. \n
412*bed243d3SAndroid Build Coastguard Worker ///      01: Downward (toward negative infinity). \n
413*bed243d3SAndroid Build Coastguard Worker ///      10: Upward (toward positive infinity). \n
414*bed243d3SAndroid Build Coastguard Worker ///      11: Truncated.
415*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded values.
416*bed243d3SAndroid Build Coastguard Worker #define _mm256_round_pd(V, M) \
417*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)))
418*bed243d3SAndroid Build Coastguard Worker 
419*bed243d3SAndroid Build Coastguard Worker /// Rounds the values stored in a 256-bit vector of [8 x float] as
420*bed243d3SAndroid Build Coastguard Worker ///    specified by the byte operand. The source values are rounded to integer
421*bed243d3SAndroid Build Coastguard Worker ///    values and returned as floating-point values.
422*bed243d3SAndroid Build Coastguard Worker ///
423*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
424*bed243d3SAndroid Build Coastguard Worker ///
425*bed243d3SAndroid Build Coastguard Worker /// \code
426*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_round_ps(__m256 V, const int M);
427*bed243d3SAndroid Build Coastguard Worker /// \endcode
428*bed243d3SAndroid Build Coastguard Worker ///
429*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.
430*bed243d3SAndroid Build Coastguard Worker ///
431*bed243d3SAndroid Build Coastguard Worker /// \param V
432*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
433*bed243d3SAndroid Build Coastguard Worker /// \param M
434*bed243d3SAndroid Build Coastguard Worker ///    An integer value that specifies the rounding operation. \n
435*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:4] are reserved. \n
436*bed243d3SAndroid Build Coastguard Worker ///    Bit [3] is a precision exception value: \n
437*bed243d3SAndroid Build Coastguard Worker ///      0: A normal PE exception is used. \n
438*bed243d3SAndroid Build Coastguard Worker ///      1: The PE field is not updated. \n
439*bed243d3SAndroid Build Coastguard Worker ///    Bit [2] is the rounding control source: \n
440*bed243d3SAndroid Build Coastguard Worker ///      0: Use bits [1:0] of \a M. \n
441*bed243d3SAndroid Build Coastguard Worker ///      1: Use the current MXCSR setting. \n
442*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0] contain the rounding control definition: \n
443*bed243d3SAndroid Build Coastguard Worker ///      00: Nearest. \n
444*bed243d3SAndroid Build Coastguard Worker ///      01: Downward (toward negative infinity). \n
445*bed243d3SAndroid Build Coastguard Worker ///      10: Upward (toward positive infinity). \n
446*bed243d3SAndroid Build Coastguard Worker ///      11: Truncated.
447*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded values.
448*bed243d3SAndroid Build Coastguard Worker #define _mm256_round_ps(V, M) \
449*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)))
450*bed243d3SAndroid Build Coastguard Worker 
451*bed243d3SAndroid Build Coastguard Worker /// Rounds up the values stored in a 256-bit vector of [4 x double]. The
452*bed243d3SAndroid Build Coastguard Worker ///    source values are rounded up to integer values and returned as 64-bit
453*bed243d3SAndroid Build Coastguard Worker ///    double-precision floating-point values.
454*bed243d3SAndroid Build Coastguard Worker ///
455*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
456*bed243d3SAndroid Build Coastguard Worker ///
457*bed243d3SAndroid Build Coastguard Worker /// \code
458*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_ceil_pd(__m256d V);
459*bed243d3SAndroid Build Coastguard Worker /// \endcode
460*bed243d3SAndroid Build Coastguard Worker ///
461*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.
462*bed243d3SAndroid Build Coastguard Worker ///
463*bed243d3SAndroid Build Coastguard Worker /// \param V
464*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
465*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded up values.
466*bed243d3SAndroid Build Coastguard Worker #define _mm256_ceil_pd(V)  _mm256_round_pd((V), _MM_FROUND_CEIL)
467*bed243d3SAndroid Build Coastguard Worker 
468*bed243d3SAndroid Build Coastguard Worker /// Rounds down the values stored in a 256-bit vector of [4 x double].
469*bed243d3SAndroid Build Coastguard Worker ///    The source values are rounded down to integer values and returned as
470*bed243d3SAndroid Build Coastguard Worker ///    64-bit double-precision floating-point values.
471*bed243d3SAndroid Build Coastguard Worker ///
472*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
473*bed243d3SAndroid Build Coastguard Worker ///
474*bed243d3SAndroid Build Coastguard Worker /// \code
475*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_floor_pd(__m256d V);
476*bed243d3SAndroid Build Coastguard Worker /// \endcode
477*bed243d3SAndroid Build Coastguard Worker ///
478*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.
479*bed243d3SAndroid Build Coastguard Worker ///
480*bed243d3SAndroid Build Coastguard Worker /// \param V
481*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
482*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the rounded down
483*bed243d3SAndroid Build Coastguard Worker ///    values.
484*bed243d3SAndroid Build Coastguard Worker #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
485*bed243d3SAndroid Build Coastguard Worker 
486*bed243d3SAndroid Build Coastguard Worker /// Rounds up the values stored in a 256-bit vector of [8 x float]. The
487*bed243d3SAndroid Build Coastguard Worker ///    source values are rounded up to integer values and returned as
488*bed243d3SAndroid Build Coastguard Worker ///    floating-point values.
489*bed243d3SAndroid Build Coastguard Worker ///
490*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
491*bed243d3SAndroid Build Coastguard Worker ///
492*bed243d3SAndroid Build Coastguard Worker /// \code
493*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_ceil_ps(__m256 V);
494*bed243d3SAndroid Build Coastguard Worker /// \endcode
495*bed243d3SAndroid Build Coastguard Worker ///
496*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.
497*bed243d3SAndroid Build Coastguard Worker ///
498*bed243d3SAndroid Build Coastguard Worker /// \param V
499*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
500*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded up values.
501*bed243d3SAndroid Build Coastguard Worker #define _mm256_ceil_ps(V)  _mm256_round_ps((V), _MM_FROUND_CEIL)
502*bed243d3SAndroid Build Coastguard Worker 
503*bed243d3SAndroid Build Coastguard Worker /// Rounds down the values stored in a 256-bit vector of [8 x float]. The
504*bed243d3SAndroid Build Coastguard Worker ///    source values are rounded down to integer values and returned as
505*bed243d3SAndroid Build Coastguard Worker ///    floating-point values.
506*bed243d3SAndroid Build Coastguard Worker ///
507*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
508*bed243d3SAndroid Build Coastguard Worker ///
509*bed243d3SAndroid Build Coastguard Worker /// \code
510*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_floor_ps(__m256 V);
511*bed243d3SAndroid Build Coastguard Worker /// \endcode
512*bed243d3SAndroid Build Coastguard Worker ///
513*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.
514*bed243d3SAndroid Build Coastguard Worker ///
515*bed243d3SAndroid Build Coastguard Worker /// \param V
516*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
517*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the rounded down values.
518*bed243d3SAndroid Build Coastguard Worker #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
519*bed243d3SAndroid Build Coastguard Worker 
520*bed243d3SAndroid Build Coastguard Worker /* Logical */
521*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise AND of two 256-bit vectors of [4 x double].
522*bed243d3SAndroid Build Coastguard Worker ///
523*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
524*bed243d3SAndroid Build Coastguard Worker ///
525*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VANDPD </c> instruction.
526*bed243d3SAndroid Build Coastguard Worker ///
527*bed243d3SAndroid Build Coastguard Worker /// \param __a
528*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
529*bed243d3SAndroid Build Coastguard Worker /// \param __b
530*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
531*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
532*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
533*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_and_pd(__m256d __a,__m256d __b)534*bed243d3SAndroid Build Coastguard Worker _mm256_and_pd(__m256d __a, __m256d __b)
535*bed243d3SAndroid Build Coastguard Worker {
536*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a & (__v4du)__b);
537*bed243d3SAndroid Build Coastguard Worker }
538*bed243d3SAndroid Build Coastguard Worker 
539*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise AND of two 256-bit vectors of [8 x float].
540*bed243d3SAndroid Build Coastguard Worker ///
541*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
542*bed243d3SAndroid Build Coastguard Worker ///
543*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VANDPS </c> instruction.
544*bed243d3SAndroid Build Coastguard Worker ///
545*bed243d3SAndroid Build Coastguard Worker /// \param __a
546*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
547*bed243d3SAndroid Build Coastguard Worker /// \param __b
548*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
549*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
550*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
551*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_and_ps(__m256 __a,__m256 __b)552*bed243d3SAndroid Build Coastguard Worker _mm256_and_ps(__m256 __a, __m256 __b)
553*bed243d3SAndroid Build Coastguard Worker {
554*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8su)__a & (__v8su)__b);
555*bed243d3SAndroid Build Coastguard Worker }
556*bed243d3SAndroid Build Coastguard Worker 
557*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise AND of two 256-bit vectors of [4 x double], using
558*bed243d3SAndroid Build Coastguard Worker ///    the one's complement of the values contained in the first source operand.
559*bed243d3SAndroid Build Coastguard Worker ///
560*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
561*bed243d3SAndroid Build Coastguard Worker ///
562*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VANDNPD </c> instruction.
563*bed243d3SAndroid Build Coastguard Worker ///
564*bed243d3SAndroid Build Coastguard Worker /// \param __a
565*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the left source operand. The
566*bed243d3SAndroid Build Coastguard Worker ///    one's complement of this value is used in the bitwise AND.
567*bed243d3SAndroid Build Coastguard Worker /// \param __b
568*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the right source operand.
569*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
570*bed243d3SAndroid Build Coastguard Worker ///    values of the second operand and the one's complement of the first
571*bed243d3SAndroid Build Coastguard Worker ///    operand.
572*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_andnot_pd(__m256d __a,__m256d __b)573*bed243d3SAndroid Build Coastguard Worker _mm256_andnot_pd(__m256d __a, __m256d __b)
574*bed243d3SAndroid Build Coastguard Worker {
575*bed243d3SAndroid Build Coastguard Worker   return (__m256d)(~(__v4du)__a & (__v4du)__b);
576*bed243d3SAndroid Build Coastguard Worker }
577*bed243d3SAndroid Build Coastguard Worker 
578*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise AND of two 256-bit vectors of [8 x float], using
579*bed243d3SAndroid Build Coastguard Worker ///    the one's complement of the values contained in the first source operand.
580*bed243d3SAndroid Build Coastguard Worker ///
581*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
582*bed243d3SAndroid Build Coastguard Worker ///
583*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VANDNPS </c> instruction.
584*bed243d3SAndroid Build Coastguard Worker ///
585*bed243d3SAndroid Build Coastguard Worker /// \param __a
586*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the left source operand. The
587*bed243d3SAndroid Build Coastguard Worker ///    one's complement of this value is used in the bitwise AND.
588*bed243d3SAndroid Build Coastguard Worker /// \param __b
589*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the right source operand.
590*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
591*bed243d3SAndroid Build Coastguard Worker ///    values of the second operand and the one's complement of the first
592*bed243d3SAndroid Build Coastguard Worker ///    operand.
593*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_andnot_ps(__m256 __a,__m256 __b)594*bed243d3SAndroid Build Coastguard Worker _mm256_andnot_ps(__m256 __a, __m256 __b)
595*bed243d3SAndroid Build Coastguard Worker {
596*bed243d3SAndroid Build Coastguard Worker   return (__m256)(~(__v8su)__a & (__v8su)__b);
597*bed243d3SAndroid Build Coastguard Worker }
598*bed243d3SAndroid Build Coastguard Worker 
599*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise OR of two 256-bit vectors of [4 x double].
600*bed243d3SAndroid Build Coastguard Worker ///
601*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
602*bed243d3SAndroid Build Coastguard Worker ///
603*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VORPD </c> instruction.
604*bed243d3SAndroid Build Coastguard Worker ///
605*bed243d3SAndroid Build Coastguard Worker /// \param __a
606*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
607*bed243d3SAndroid Build Coastguard Worker /// \param __b
608*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
609*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
610*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
611*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_or_pd(__m256d __a,__m256d __b)612*bed243d3SAndroid Build Coastguard Worker _mm256_or_pd(__m256d __a, __m256d __b)
613*bed243d3SAndroid Build Coastguard Worker {
614*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a | (__v4du)__b);
615*bed243d3SAndroid Build Coastguard Worker }
616*bed243d3SAndroid Build Coastguard Worker 
617*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise OR of two 256-bit vectors of [8 x float].
618*bed243d3SAndroid Build Coastguard Worker ///
619*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
620*bed243d3SAndroid Build Coastguard Worker ///
621*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VORPS </c> instruction.
622*bed243d3SAndroid Build Coastguard Worker ///
623*bed243d3SAndroid Build Coastguard Worker /// \param __a
624*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
625*bed243d3SAndroid Build Coastguard Worker /// \param __b
626*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
627*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
628*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
629*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_or_ps(__m256 __a,__m256 __b)630*bed243d3SAndroid Build Coastguard Worker _mm256_or_ps(__m256 __a, __m256 __b)
631*bed243d3SAndroid Build Coastguard Worker {
632*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8su)__a | (__v8su)__b);
633*bed243d3SAndroid Build Coastguard Worker }
634*bed243d3SAndroid Build Coastguard Worker 
635*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise XOR of two 256-bit vectors of [4 x double].
636*bed243d3SAndroid Build Coastguard Worker ///
637*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
638*bed243d3SAndroid Build Coastguard Worker ///
639*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VXORPD </c> instruction.
640*bed243d3SAndroid Build Coastguard Worker ///
641*bed243d3SAndroid Build Coastguard Worker /// \param __a
642*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
643*bed243d3SAndroid Build Coastguard Worker /// \param __b
644*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
645*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
646*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
647*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_xor_pd(__m256d __a,__m256d __b)648*bed243d3SAndroid Build Coastguard Worker _mm256_xor_pd(__m256d __a, __m256d __b)
649*bed243d3SAndroid Build Coastguard Worker {
650*bed243d3SAndroid Build Coastguard Worker   return (__m256d)((__v4du)__a ^ (__v4du)__b);
651*bed243d3SAndroid Build Coastguard Worker }
652*bed243d3SAndroid Build Coastguard Worker 
653*bed243d3SAndroid Build Coastguard Worker /// Performs a bitwise XOR of two 256-bit vectors of [8 x float].
654*bed243d3SAndroid Build Coastguard Worker ///
655*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
656*bed243d3SAndroid Build Coastguard Worker ///
657*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VXORPS </c> instruction.
658*bed243d3SAndroid Build Coastguard Worker ///
659*bed243d3SAndroid Build Coastguard Worker /// \param __a
660*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
661*bed243d3SAndroid Build Coastguard Worker /// \param __b
662*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
663*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
664*bed243d3SAndroid Build Coastguard Worker ///    values between both operands.
665*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_xor_ps(__m256 __a,__m256 __b)666*bed243d3SAndroid Build Coastguard Worker _mm256_xor_ps(__m256 __a, __m256 __b)
667*bed243d3SAndroid Build Coastguard Worker {
668*bed243d3SAndroid Build Coastguard Worker   return (__m256)((__v8su)__a ^ (__v8su)__b);
669*bed243d3SAndroid Build Coastguard Worker }
670*bed243d3SAndroid Build Coastguard Worker 
671*bed243d3SAndroid Build Coastguard Worker /* Horizontal arithmetic */
672*bed243d3SAndroid Build Coastguard Worker /// Horizontally adds the adjacent pairs of values contained in two
673*bed243d3SAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double].
674*bed243d3SAndroid Build Coastguard Worker ///
675*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
676*bed243d3SAndroid Build Coastguard Worker ///
677*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
678*bed243d3SAndroid Build Coastguard Worker ///
679*bed243d3SAndroid Build Coastguard Worker /// \param __a
680*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
681*bed243d3SAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the even-indexed
682*bed243d3SAndroid Build Coastguard Worker ///    elements of a vector of [4 x double].
683*bed243d3SAndroid Build Coastguard Worker /// \param __b
684*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
685*bed243d3SAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the odd-indexed
686*bed243d3SAndroid Build Coastguard Worker ///    elements of a vector of [4 x double].
687*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
688*bed243d3SAndroid Build Coastguard Worker ///    both operands.
689*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_hadd_pd(__m256d __a,__m256d __b)690*bed243d3SAndroid Build Coastguard Worker _mm256_hadd_pd(__m256d __a, __m256d __b)
691*bed243d3SAndroid Build Coastguard Worker {
692*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
693*bed243d3SAndroid Build Coastguard Worker }
694*bed243d3SAndroid Build Coastguard Worker 
695*bed243d3SAndroid Build Coastguard Worker /// Horizontally adds the adjacent pairs of values contained in two
696*bed243d3SAndroid Build Coastguard Worker ///    256-bit vectors of [8 x float].
697*bed243d3SAndroid Build Coastguard Worker ///
698*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
699*bed243d3SAndroid Build Coastguard Worker ///
700*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
701*bed243d3SAndroid Build Coastguard Worker ///
702*bed243d3SAndroid Build Coastguard Worker /// \param __a
703*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
704*bed243d3SAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the elements with
705*bed243d3SAndroid Build Coastguard Worker ///    index 0, 1, 4, 5 of a vector of [8 x float].
706*bed243d3SAndroid Build Coastguard Worker /// \param __b
707*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
708*bed243d3SAndroid Build Coastguard Worker ///    The horizontal sums of the values are returned in the elements with
709*bed243d3SAndroid Build Coastguard Worker ///    index 2, 3, 6, 7 of a vector of [8 x float].
710*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
711*bed243d3SAndroid Build Coastguard Worker ///    both operands.
712*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_hadd_ps(__m256 __a,__m256 __b)713*bed243d3SAndroid Build Coastguard Worker _mm256_hadd_ps(__m256 __a, __m256 __b)
714*bed243d3SAndroid Build Coastguard Worker {
715*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
716*bed243d3SAndroid Build Coastguard Worker }
717*bed243d3SAndroid Build Coastguard Worker 
718*bed243d3SAndroid Build Coastguard Worker /// Horizontally subtracts the adjacent pairs of values contained in two
719*bed243d3SAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double].
720*bed243d3SAndroid Build Coastguard Worker ///
721*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
722*bed243d3SAndroid Build Coastguard Worker ///
723*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
724*bed243d3SAndroid Build Coastguard Worker ///
725*bed243d3SAndroid Build Coastguard Worker /// \param __a
726*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
727*bed243d3SAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
728*bed243d3SAndroid Build Coastguard Worker ///    even-indexed elements of a vector of [4 x double].
729*bed243d3SAndroid Build Coastguard Worker /// \param __b
730*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing one of the source operands.
731*bed243d3SAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
732*bed243d3SAndroid Build Coastguard Worker ///    odd-indexed elements of a vector of [4 x double].
733*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the horizontal
734*bed243d3SAndroid Build Coastguard Worker ///    differences of both operands.
735*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_hsub_pd(__m256d __a,__m256d __b)736*bed243d3SAndroid Build Coastguard Worker _mm256_hsub_pd(__m256d __a, __m256d __b)
737*bed243d3SAndroid Build Coastguard Worker {
738*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
739*bed243d3SAndroid Build Coastguard Worker }
740*bed243d3SAndroid Build Coastguard Worker 
741*bed243d3SAndroid Build Coastguard Worker /// Horizontally subtracts the adjacent pairs of values contained in two
742*bed243d3SAndroid Build Coastguard Worker ///    256-bit vectors of [8 x float].
743*bed243d3SAndroid Build Coastguard Worker ///
744*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
745*bed243d3SAndroid Build Coastguard Worker ///
746*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
747*bed243d3SAndroid Build Coastguard Worker ///
748*bed243d3SAndroid Build Coastguard Worker /// \param __a
749*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
750*bed243d3SAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
751*bed243d3SAndroid Build Coastguard Worker ///    elements with index 0, 1, 4, 5 of a vector of [8 x float].
752*bed243d3SAndroid Build Coastguard Worker /// \param __b
753*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing one of the source operands.
754*bed243d3SAndroid Build Coastguard Worker ///    The horizontal differences between the values are returned in the
755*bed243d3SAndroid Build Coastguard Worker ///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
756*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the horizontal
757*bed243d3SAndroid Build Coastguard Worker ///    differences of both operands.
758*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_hsub_ps(__m256 __a,__m256 __b)759*bed243d3SAndroid Build Coastguard Worker _mm256_hsub_ps(__m256 __a, __m256 __b)
760*bed243d3SAndroid Build Coastguard Worker {
761*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
762*bed243d3SAndroid Build Coastguard Worker }
763*bed243d3SAndroid Build Coastguard Worker 
764*bed243d3SAndroid Build Coastguard Worker /* Vector permutations */
765*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 128-bit vector of [2 x double] as specified
766*bed243d3SAndroid Build Coastguard Worker ///    by the 128-bit integer vector operand.
767*bed243d3SAndroid Build Coastguard Worker ///
768*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
769*bed243d3SAndroid Build Coastguard Worker ///
770*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.
771*bed243d3SAndroid Build Coastguard Worker ///
772*bed243d3SAndroid Build Coastguard Worker /// \param __a
773*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
774*bed243d3SAndroid Build Coastguard Worker /// \param __c
775*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector operand specifying how the values are to be
776*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
777*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]: \n
778*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [63:0] of the returned
779*bed243d3SAndroid Build Coastguard Worker ///         vector. \n
780*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [63:0] of the
781*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
782*bed243d3SAndroid Build Coastguard Worker ///    Bit [65]: \n
783*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [127:64] of the
784*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
785*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
786*bed243d3SAndroid Build Coastguard Worker ///         returned vector.
787*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
788*bed243d3SAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS128
_mm_permutevar_pd(__m128d __a,__m128i __c)789*bed243d3SAndroid Build Coastguard Worker _mm_permutevar_pd(__m128d __a, __m128i __c)
790*bed243d3SAndroid Build Coastguard Worker {
791*bed243d3SAndroid Build Coastguard Worker   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
792*bed243d3SAndroid Build Coastguard Worker }
793*bed243d3SAndroid Build Coastguard Worker 
794*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 256-bit vector of [4 x double] as specified
795*bed243d3SAndroid Build Coastguard Worker ///    by the 256-bit integer vector operand.
796*bed243d3SAndroid Build Coastguard Worker ///
797*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
798*bed243d3SAndroid Build Coastguard Worker ///
799*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.
800*bed243d3SAndroid Build Coastguard Worker ///
801*bed243d3SAndroid Build Coastguard Worker /// \param __a
802*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
803*bed243d3SAndroid Build Coastguard Worker /// \param __c
804*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector operand specifying how the values are to be
805*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
806*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]: \n
807*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [63:0] of the returned
808*bed243d3SAndroid Build Coastguard Worker ///         vector. \n
809*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [63:0] of the
810*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
811*bed243d3SAndroid Build Coastguard Worker ///    Bit [65]: \n
812*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [127:64] of the
813*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
814*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
815*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
816*bed243d3SAndroid Build Coastguard Worker ///    Bit [129]: \n
817*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [191:128] of the source are copied to bits [191:128] of the
818*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
819*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [255:192] of the source are copied to bits [191:128] of the
820*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
821*bed243d3SAndroid Build Coastguard Worker ///    Bit [193]: \n
822*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [191:128] of the source are copied to bits [255:192] of the
823*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
824*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [255:192] of the source are copied to bits [255:192] of the
825*bed243d3SAndroid Build Coastguard Worker ///    returned vector.
826*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
827*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_permutevar_pd(__m256d __a,__m256i __c)828*bed243d3SAndroid Build Coastguard Worker _mm256_permutevar_pd(__m256d __a, __m256i __c)
829*bed243d3SAndroid Build Coastguard Worker {
830*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
831*bed243d3SAndroid Build Coastguard Worker }
832*bed243d3SAndroid Build Coastguard Worker 
833*bed243d3SAndroid Build Coastguard Worker /// Copies the values stored in a 128-bit vector of [4 x float] as
834*bed243d3SAndroid Build Coastguard Worker ///    specified by the 128-bit integer vector operand.
835*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
836*bed243d3SAndroid Build Coastguard Worker ///
837*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
838*bed243d3SAndroid Build Coastguard Worker ///
839*bed243d3SAndroid Build Coastguard Worker /// \param __a
840*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
841*bed243d3SAndroid Build Coastguard Worker /// \param __c
842*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector operand specifying how the values are to be
843*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
844*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
845*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [31:0] of the
846*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
847*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [31:0] of the
848*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
849*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [31:0] of the
850*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
851*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [31:0] of the
852*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
853*bed243d3SAndroid Build Coastguard Worker ///    Bits [33:32]: \n
854*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [63:32] of the
855*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
856*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [63:32] of the
857*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
858*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [63:32] of the
859*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
860*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [63:32] of the
861*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
862*bed243d3SAndroid Build Coastguard Worker ///    Bits [65:64]: \n
863*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [95:64] of the
864*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
865*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [95:64] of the
866*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
867*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [95:64] of the
868*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
869*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [95:64] of the
870*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
871*bed243d3SAndroid Build Coastguard Worker ///    Bits [97:96]: \n
872*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [127:96] of the
873*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
874*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [127:96] of the
875*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
876*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [127:96] of the
877*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
878*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
879*bed243d3SAndroid Build Coastguard Worker ///          returned vector.
880*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
881*bed243d3SAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_permutevar_ps(__m128 __a,__m128i __c)882*bed243d3SAndroid Build Coastguard Worker _mm_permutevar_ps(__m128 __a, __m128i __c)
883*bed243d3SAndroid Build Coastguard Worker {
884*bed243d3SAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
885*bed243d3SAndroid Build Coastguard Worker }
886*bed243d3SAndroid Build Coastguard Worker 
887*bed243d3SAndroid Build Coastguard Worker /// Copies the values stored in a 256-bit vector of [8 x float] as
888*bed243d3SAndroid Build Coastguard Worker ///    specified by the 256-bit integer vector operand.
889*bed243d3SAndroid Build Coastguard Worker ///
890*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
891*bed243d3SAndroid Build Coastguard Worker ///
892*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
893*bed243d3SAndroid Build Coastguard Worker ///
894*bed243d3SAndroid Build Coastguard Worker /// \param __a
895*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
896*bed243d3SAndroid Build Coastguard Worker /// \param __c
897*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector operand specifying how the values are to be
898*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
899*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
900*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [31:0] of the
901*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
902*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [31:0] of the
903*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
904*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [31:0] of the
905*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
906*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [31:0] of the
907*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
908*bed243d3SAndroid Build Coastguard Worker ///    Bits [33:32]: \n
909*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [63:32] of the
910*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
911*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [63:32] of the
912*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
913*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [63:32] of the
914*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
915*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [63:32] of the
916*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
917*bed243d3SAndroid Build Coastguard Worker ///    Bits [65:64]: \n
918*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [95:64] of the
919*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
920*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [95:64] of the
921*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
922*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [95:64] of the
923*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
924*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [95:64] of the
925*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
926*bed243d3SAndroid Build Coastguard Worker ///    Bits [97:96]: \n
927*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [127:96] of the
928*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
929*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [127:96] of the
930*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
931*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [127:96] of the
932*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
933*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
934*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
935*bed243d3SAndroid Build Coastguard Worker ///    Bits [129:128]: \n
936*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [159:128] of the
937*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
938*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [159:128] of the
939*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
940*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [159:128] of the
941*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
942*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [159:128] of the
943*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
944*bed243d3SAndroid Build Coastguard Worker ///    Bits [161:160]: \n
945*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [191:160] of the
946*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
947*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [191:160] of the
948*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
949*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [191:160] of the
950*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
951*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [191:160] of the
952*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
953*bed243d3SAndroid Build Coastguard Worker ///    Bits [193:192]: \n
954*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [223:192] of the
955*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
956*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [223:192] of the
957*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
958*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [223:192] of the
959*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
960*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [223:192] of the
961*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
962*bed243d3SAndroid Build Coastguard Worker ///    Bits [225:224]: \n
963*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [255:224] of the
964*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
965*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [255:224] of the
966*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
967*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [255:224] of the
968*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
969*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [255:224] of the
970*bed243d3SAndroid Build Coastguard Worker ///          returned vector.
971*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
972*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_permutevar_ps(__m256 __a,__m256i __c)973*bed243d3SAndroid Build Coastguard Worker _mm256_permutevar_ps(__m256 __a, __m256i __c)
974*bed243d3SAndroid Build Coastguard Worker {
975*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
976*bed243d3SAndroid Build Coastguard Worker }
977*bed243d3SAndroid Build Coastguard Worker 
978*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 128-bit vector of [2 x double] as specified
979*bed243d3SAndroid Build Coastguard Worker ///    by the immediate integer operand.
980*bed243d3SAndroid Build Coastguard Worker ///
981*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
982*bed243d3SAndroid Build Coastguard Worker ///
983*bed243d3SAndroid Build Coastguard Worker /// \code
984*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_permute_pd(__m128d A, const int C);
985*bed243d3SAndroid Build Coastguard Worker /// \endcode
986*bed243d3SAndroid Build Coastguard Worker ///
987*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.
988*bed243d3SAndroid Build Coastguard Worker ///
989*bed243d3SAndroid Build Coastguard Worker /// \param A
990*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
991*bed243d3SAndroid Build Coastguard Worker /// \param C
992*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
993*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
994*bed243d3SAndroid Build Coastguard Worker ///    Bit [0]: \n
995*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [63:0] of the returned
996*bed243d3SAndroid Build Coastguard Worker ///         vector. \n
997*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [63:0] of the
998*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
999*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]: \n
1000*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [127:64] of the
1001*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1002*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
1003*bed243d3SAndroid Build Coastguard Worker ///         returned vector.
1004*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the copied values.
1005*bed243d3SAndroid Build Coastguard Worker #define _mm_permute_pd(A, C) \
1006*bed243d3SAndroid Build Coastguard Worker   ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)))
1007*bed243d3SAndroid Build Coastguard Worker 
1008*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 256-bit vector of [4 x double] as specified by
1009*bed243d3SAndroid Build Coastguard Worker ///    the immediate integer operand.
1010*bed243d3SAndroid Build Coastguard Worker ///
1011*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1012*bed243d3SAndroid Build Coastguard Worker ///
1013*bed243d3SAndroid Build Coastguard Worker /// \code
1014*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_permute_pd(__m256d A, const int C);
1015*bed243d3SAndroid Build Coastguard Worker /// \endcode
1016*bed243d3SAndroid Build Coastguard Worker ///
1017*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.
1018*bed243d3SAndroid Build Coastguard Worker ///
1019*bed243d3SAndroid Build Coastguard Worker /// \param A
1020*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1021*bed243d3SAndroid Build Coastguard Worker /// \param C
1022*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1023*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
1024*bed243d3SAndroid Build Coastguard Worker ///    Bit [0]: \n
1025*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [63:0] of the returned
1026*bed243d3SAndroid Build Coastguard Worker ///         vector. \n
1027*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [63:0] of the
1028*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1029*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]: \n
1030*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [63:0] of the source are copied to bits [127:64] of the
1031*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1032*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
1033*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1034*bed243d3SAndroid Build Coastguard Worker ///    Bit [2]: \n
1035*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [191:128] of the source are copied to bits [191:128] of the
1036*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1037*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [255:192] of the source are copied to bits [191:128] of the
1038*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1039*bed243d3SAndroid Build Coastguard Worker ///    Bit [3]: \n
1040*bed243d3SAndroid Build Coastguard Worker ///      0: Bits [191:128] of the source are copied to bits [255:192] of the
1041*bed243d3SAndroid Build Coastguard Worker ///         returned vector. \n
1042*bed243d3SAndroid Build Coastguard Worker ///      1: Bits [255:192] of the source are copied to bits [255:192] of the
1043*bed243d3SAndroid Build Coastguard Worker ///         returned vector.
1044*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1045*bed243d3SAndroid Build Coastguard Worker #define _mm256_permute_pd(A, C) \
1046*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)))
1047*bed243d3SAndroid Build Coastguard Worker 
1048*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 128-bit vector of [4 x float] as specified by
1049*bed243d3SAndroid Build Coastguard Worker ///    the immediate integer operand.
1050*bed243d3SAndroid Build Coastguard Worker ///
1051*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1052*bed243d3SAndroid Build Coastguard Worker ///
1053*bed243d3SAndroid Build Coastguard Worker /// \code
1054*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_permute_ps(__m128 A, const int C);
1055*bed243d3SAndroid Build Coastguard Worker /// \endcode
1056*bed243d3SAndroid Build Coastguard Worker ///
1057*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
1058*bed243d3SAndroid Build Coastguard Worker ///
1059*bed243d3SAndroid Build Coastguard Worker /// \param A
1060*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1061*bed243d3SAndroid Build Coastguard Worker /// \param C
1062*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1063*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
1064*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1065*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [31:0] of the
1066*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1067*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [31:0] of the
1068*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1069*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [31:0] of the
1070*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1071*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [31:0] of the
1072*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1073*bed243d3SAndroid Build Coastguard Worker ///    Bits [3:2]: \n
1074*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [63:32] of the
1075*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1076*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [63:32] of the
1077*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1078*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [63:32] of the
1079*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1080*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [63:32] of the
1081*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1082*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1083*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [95:64] of the
1084*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1085*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [95:64] of the
1086*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1087*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [95:64] of the
1088*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1089*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [95:64] of the
1090*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1091*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:6]: \n
1092*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [127:96] of the
1093*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1094*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [127:96] of the
1095*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1096*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [127:96] of the
1097*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1098*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
1099*bed243d3SAndroid Build Coastguard Worker ///          returned vector.
1100*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the copied values.
1101*bed243d3SAndroid Build Coastguard Worker #define _mm_permute_ps(A, C) \
1102*bed243d3SAndroid Build Coastguard Worker   ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)))
1103*bed243d3SAndroid Build Coastguard Worker 
1104*bed243d3SAndroid Build Coastguard Worker /// Copies the values in a 256-bit vector of [8 x float] as specified by
1105*bed243d3SAndroid Build Coastguard Worker ///    the immediate integer operand.
1106*bed243d3SAndroid Build Coastguard Worker ///
1107*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1108*bed243d3SAndroid Build Coastguard Worker ///
1109*bed243d3SAndroid Build Coastguard Worker /// \code
1110*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_permute_ps(__m256 A, const int C);
1111*bed243d3SAndroid Build Coastguard Worker /// \endcode
1112*bed243d3SAndroid Build Coastguard Worker ///
1113*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
1114*bed243d3SAndroid Build Coastguard Worker ///
1115*bed243d3SAndroid Build Coastguard Worker /// \param A
1116*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1117*bed243d3SAndroid Build Coastguard Worker /// \param C
1118*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1119*bed243d3SAndroid Build Coastguard Worker ///    copied. \n
1120*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1121*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [31:0] of the
1122*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1123*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [31:0] of the
1124*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1125*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [31:0] of the
1126*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1127*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [31:0] of the
1128*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1129*bed243d3SAndroid Build Coastguard Worker ///    Bits [3:2]: \n
1130*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [63:32] of the
1131*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1132*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [63:32] of the
1133*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1134*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [63:32] of the
1135*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1136*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [63:32] of the
1137*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1138*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1139*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [95:64] of the
1140*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1141*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [95:64] of the
1142*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1143*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [95:64] of the
1144*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1145*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [95:64] of the
1146*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1147*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:6]: \n
1148*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [31:0] of the source are copied to bits [127:96] of the
1149*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1150*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [63:32] of the source are copied to bits [127:96] of the
1151*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1152*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [95:64] of the source are copied to bits [127:96] of the
1153*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1154*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
1155*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1156*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1157*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [159:128] of the
1158*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1159*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [159:128] of the
1160*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1161*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [159:128] of the
1162*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1163*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [159:128] of the
1164*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1165*bed243d3SAndroid Build Coastguard Worker ///    Bits [3:2]: \n
1166*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [191:160] of the
1167*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1168*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [191:160] of the
1169*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1170*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [191:160] of the
1171*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1172*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [191:160] of the
1173*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1174*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1175*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [223:192] of the
1176*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1177*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [223:192] of the
1178*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1179*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [223:192] of the
1180*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1181*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [223:192] of the
1182*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1183*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:6]: \n
1184*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [159:128] of the source are copied to bits [255:224] of the
1185*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1186*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [191:160] of the source are copied to bits [255:224] of the
1187*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1188*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [223:192] of the source are copied to bits [255:224] of the
1189*bed243d3SAndroid Build Coastguard Worker ///          returned vector. \n
1190*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:224] of the source are copied to bits [255:224] of the
1191*bed243d3SAndroid Build Coastguard Worker ///          returned vector.
1192*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1193*bed243d3SAndroid Build Coastguard Worker #define _mm256_permute_ps(A, C) \
1194*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)))
1195*bed243d3SAndroid Build Coastguard Worker 
1196*bed243d3SAndroid Build Coastguard Worker /// Permutes 128-bit data values stored in two 256-bit vectors of
1197*bed243d3SAndroid Build Coastguard Worker ///    [4 x double], as specified by the immediate integer operand.
1198*bed243d3SAndroid Build Coastguard Worker ///
1199*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1200*bed243d3SAndroid Build Coastguard Worker ///
1201*bed243d3SAndroid Build Coastguard Worker /// \code
1202*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);
1203*bed243d3SAndroid Build Coastguard Worker /// \endcode
1204*bed243d3SAndroid Build Coastguard Worker ///
1205*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.
1206*bed243d3SAndroid Build Coastguard Worker ///
1207*bed243d3SAndroid Build Coastguard Worker /// \param V1
1208*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1209*bed243d3SAndroid Build Coastguard Worker /// \param V2
1210*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double.
1211*bed243d3SAndroid Build Coastguard Worker /// \param M
1212*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1213*bed243d3SAndroid Build Coastguard Worker ///    permuted. \n
1214*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1215*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1216*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1217*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1218*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1219*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1220*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1221*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1222*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1223*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1224*bed243d3SAndroid Build Coastguard Worker ///      00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1225*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1226*bed243d3SAndroid Build Coastguard Worker ///      01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1227*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1228*bed243d3SAndroid Build Coastguard Worker ///      10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1229*bed243d3SAndroid Build Coastguard Worker ///          destination. \n
1230*bed243d3SAndroid Build Coastguard Worker ///      11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1231*bed243d3SAndroid Build Coastguard Worker ///          destination.
1232*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1233*bed243d3SAndroid Build Coastguard Worker #define _mm256_permute2f128_pd(V1, V2, M) \
1234*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
1235*bed243d3SAndroid Build Coastguard Worker                                             (__v4df)(__m256d)(V2), (int)(M)))
1236*bed243d3SAndroid Build Coastguard Worker 
1237*bed243d3SAndroid Build Coastguard Worker /// Permutes 128-bit data values stored in two 256-bit vectors of
1238*bed243d3SAndroid Build Coastguard Worker ///    [8 x float], as specified by the immediate integer operand.
1239*bed243d3SAndroid Build Coastguard Worker ///
1240*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1241*bed243d3SAndroid Build Coastguard Worker ///
1242*bed243d3SAndroid Build Coastguard Worker /// \code
1243*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);
1244*bed243d3SAndroid Build Coastguard Worker /// \endcode
1245*bed243d3SAndroid Build Coastguard Worker ///
1246*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.
1247*bed243d3SAndroid Build Coastguard Worker ///
1248*bed243d3SAndroid Build Coastguard Worker /// \param V1
1249*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1250*bed243d3SAndroid Build Coastguard Worker /// \param V2
1251*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1252*bed243d3SAndroid Build Coastguard Worker /// \param M
1253*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be
1254*bed243d3SAndroid Build Coastguard Worker ///    permuted. \n
1255*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1256*bed243d3SAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1257*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1258*bed243d3SAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1259*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1260*bed243d3SAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1261*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1262*bed243d3SAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1263*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1264*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1265*bed243d3SAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1266*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1267*bed243d3SAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1268*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1269*bed243d3SAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1270*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1271*bed243d3SAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1272*bed243d3SAndroid Build Coastguard Worker ///    destination.
1273*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1274*bed243d3SAndroid Build Coastguard Worker #define _mm256_permute2f128_ps(V1, V2, M) \
1275*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
1276*bed243d3SAndroid Build Coastguard Worker                                            (__v8sf)(__m256)(V2), (int)(M)))
1277*bed243d3SAndroid Build Coastguard Worker 
1278*bed243d3SAndroid Build Coastguard Worker /// Permutes 128-bit data values stored in two 256-bit integer vectors,
1279*bed243d3SAndroid Build Coastguard Worker ///    as specified by the immediate integer operand.
1280*bed243d3SAndroid Build Coastguard Worker ///
1281*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1282*bed243d3SAndroid Build Coastguard Worker ///
1283*bed243d3SAndroid Build Coastguard Worker /// \code
1284*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);
1285*bed243d3SAndroid Build Coastguard Worker /// \endcode
1286*bed243d3SAndroid Build Coastguard Worker ///
1287*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.
1288*bed243d3SAndroid Build Coastguard Worker ///
1289*bed243d3SAndroid Build Coastguard Worker /// \param V1
1290*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
1291*bed243d3SAndroid Build Coastguard Worker /// \param V2
1292*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
1293*bed243d3SAndroid Build Coastguard Worker /// \param M
1294*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand specifying how the values are to be copied.
1295*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0]: \n
1296*bed243d3SAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand \a V1 are copied to bits [127:0] of the
1297*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1298*bed243d3SAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand \a V1 are copied to bits [127:0] of the
1299*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1300*bed243d3SAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand \a V2 are copied to bits [127:0] of the
1301*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1302*bed243d3SAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand \a V2 are copied to bits [127:0] of the
1303*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1304*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4]: \n
1305*bed243d3SAndroid Build Coastguard Worker ///    00: Bits [127:0] of operand \a V1 are copied to bits [255:128] of the
1306*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1307*bed243d3SAndroid Build Coastguard Worker ///    01: Bits [255:128] of operand \a V1 are copied to bits [255:128] of the
1308*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1309*bed243d3SAndroid Build Coastguard Worker ///    10: Bits [127:0] of operand \a V2 are copied to bits [255:128] of the
1310*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1311*bed243d3SAndroid Build Coastguard Worker ///    11: Bits [255:128] of operand \a V2 are copied to bits [255:128] of the
1312*bed243d3SAndroid Build Coastguard Worker ///    destination.
1313*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the copied values.
1314*bed243d3SAndroid Build Coastguard Worker #define _mm256_permute2f128_si256(V1, V2, M) \
1315*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
1316*bed243d3SAndroid Build Coastguard Worker                                             (__v8si)(__m256i)(V2), (int)(M)))
1317*bed243d3SAndroid Build Coastguard Worker 
1318*bed243d3SAndroid Build Coastguard Worker /* Vector Blend */
1319*bed243d3SAndroid Build Coastguard Worker /// Merges 64-bit double-precision data values stored in either of the
1320*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double], as specified by the immediate
1321*bed243d3SAndroid Build Coastguard Worker ///    integer operand.
1322*bed243d3SAndroid Build Coastguard Worker ///
1323*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1324*bed243d3SAndroid Build Coastguard Worker ///
1325*bed243d3SAndroid Build Coastguard Worker /// \code
1326*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);
1327*bed243d3SAndroid Build Coastguard Worker /// \endcode
1328*bed243d3SAndroid Build Coastguard Worker ///
1329*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.
1330*bed243d3SAndroid Build Coastguard Worker ///
1331*bed243d3SAndroid Build Coastguard Worker /// \param V1
1332*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1333*bed243d3SAndroid Build Coastguard Worker /// \param V2
1334*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1335*bed243d3SAndroid Build Coastguard Worker /// \param M
1336*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with mask bits [3:0] specifying how the
1337*bed243d3SAndroid Build Coastguard Worker ///    values are to be copied. The position of the mask bit corresponds to the
1338*bed243d3SAndroid Build Coastguard Worker ///    index of a copied value. When a mask bit is 0, the corresponding 64-bit
1339*bed243d3SAndroid Build Coastguard Worker ///    element in operand \a V1 is copied to the same position in the
1340*bed243d3SAndroid Build Coastguard Worker ///    destination. When a mask bit is 1, the corresponding 64-bit element in
1341*bed243d3SAndroid Build Coastguard Worker ///    operand \a V2 is copied to the same position in the destination.
1342*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1343*bed243d3SAndroid Build Coastguard Worker #define _mm256_blend_pd(V1, V2, M) \
1344*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
1345*bed243d3SAndroid Build Coastguard Worker                                       (__v4df)(__m256d)(V2), (int)(M)))
1346*bed243d3SAndroid Build Coastguard Worker 
1347*bed243d3SAndroid Build Coastguard Worker /// Merges 32-bit single-precision data values stored in either of the
1348*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float], as specified by the immediate
1349*bed243d3SAndroid Build Coastguard Worker ///    integer operand.
1350*bed243d3SAndroid Build Coastguard Worker ///
1351*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1352*bed243d3SAndroid Build Coastguard Worker ///
1353*bed243d3SAndroid Build Coastguard Worker /// \code
1354*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);
1355*bed243d3SAndroid Build Coastguard Worker /// \endcode
1356*bed243d3SAndroid Build Coastguard Worker ///
1357*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.
1358*bed243d3SAndroid Build Coastguard Worker ///
1359*bed243d3SAndroid Build Coastguard Worker /// \param V1
1360*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1361*bed243d3SAndroid Build Coastguard Worker /// \param V2
1362*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1363*bed243d3SAndroid Build Coastguard Worker /// \param M
1364*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with mask bits [7:0] specifying how the
1365*bed243d3SAndroid Build Coastguard Worker ///    values are to be copied. The position of the mask bit corresponds to the
1366*bed243d3SAndroid Build Coastguard Worker ///    index of a copied value. When a mask bit is 0, the corresponding 32-bit
1367*bed243d3SAndroid Build Coastguard Worker ///    element in operand \a V1 is copied to the same position in the
1368*bed243d3SAndroid Build Coastguard Worker ///    destination. When a mask bit is 1, the corresponding 32-bit element in
1369*bed243d3SAndroid Build Coastguard Worker ///    operand \a V2 is copied to the same position in the destination.
1370*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1371*bed243d3SAndroid Build Coastguard Worker #define _mm256_blend_ps(V1, V2, M) \
1372*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
1373*bed243d3SAndroid Build Coastguard Worker                                      (__v8sf)(__m256)(V2), (int)(M)))
1374*bed243d3SAndroid Build Coastguard Worker 
1375*bed243d3SAndroid Build Coastguard Worker /// Merges 64-bit double-precision data values stored in either of the
1376*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [4 x double], as specified by the 256-bit vector
1377*bed243d3SAndroid Build Coastguard Worker ///    operand.
1378*bed243d3SAndroid Build Coastguard Worker ///
1379*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1380*bed243d3SAndroid Build Coastguard Worker ///
1381*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.
1382*bed243d3SAndroid Build Coastguard Worker ///
1383*bed243d3SAndroid Build Coastguard Worker /// \param __a
1384*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1385*bed243d3SAndroid Build Coastguard Worker /// \param __b
1386*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1387*bed243d3SAndroid Build Coastguard Worker /// \param __c
1388*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying
1389*bed243d3SAndroid Build Coastguard Worker ///    how the values are to be copied. The position of the mask bit corresponds
1390*bed243d3SAndroid Build Coastguard Worker ///    to the most significant bit of a copied value. When a mask bit is 0, the
1391*bed243d3SAndroid Build Coastguard Worker ///    corresponding 64-bit element in operand \a __a is copied to the same
1392*bed243d3SAndroid Build Coastguard Worker ///    position in the destination. When a mask bit is 1, the corresponding
1393*bed243d3SAndroid Build Coastguard Worker ///    64-bit element in operand \a __b is copied to the same position in the
1394*bed243d3SAndroid Build Coastguard Worker ///    destination.
1395*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the copied values.
1396*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_blendv_pd(__m256d __a,__m256d __b,__m256d __c)1397*bed243d3SAndroid Build Coastguard Worker _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1398*bed243d3SAndroid Build Coastguard Worker {
1399*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_blendvpd256(
1400*bed243d3SAndroid Build Coastguard Worker     (__v4df)__a, (__v4df)__b, (__v4df)__c);
1401*bed243d3SAndroid Build Coastguard Worker }
1402*bed243d3SAndroid Build Coastguard Worker 
1403*bed243d3SAndroid Build Coastguard Worker /// Merges 32-bit single-precision data values stored in either of the
1404*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float], as specified by the 256-bit vector
1405*bed243d3SAndroid Build Coastguard Worker ///    operand.
1406*bed243d3SAndroid Build Coastguard Worker ///
1407*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1408*bed243d3SAndroid Build Coastguard Worker ///
1409*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.
1410*bed243d3SAndroid Build Coastguard Worker ///
1411*bed243d3SAndroid Build Coastguard Worker /// \param __a
1412*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1413*bed243d3SAndroid Build Coastguard Worker /// \param __b
1414*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1415*bed243d3SAndroid Build Coastguard Worker /// \param __c
1416*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,
1417*bed243d3SAndroid Build Coastguard Worker ///    and 31 specifying how the values are to be copied. The position of the
1418*bed243d3SAndroid Build Coastguard Worker ///    mask bit corresponds to the most significant bit of a copied value. When
1419*bed243d3SAndroid Build Coastguard Worker ///    a mask bit is 0, the corresponding 32-bit element in operand \a __a is
1420*bed243d3SAndroid Build Coastguard Worker ///    copied to the same position in the destination. When a mask bit is 1, the
1421*bed243d3SAndroid Build Coastguard Worker ///    corresponding 32-bit element in operand \a __b is copied to the same
1422*bed243d3SAndroid Build Coastguard Worker ///    position in the destination.
1423*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the copied values.
1424*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_blendv_ps(__m256 __a,__m256 __b,__m256 __c)1425*bed243d3SAndroid Build Coastguard Worker _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1426*bed243d3SAndroid Build Coastguard Worker {
1427*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_blendvps256(
1428*bed243d3SAndroid Build Coastguard Worker     (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
1429*bed243d3SAndroid Build Coastguard Worker }
1430*bed243d3SAndroid Build Coastguard Worker 
1431*bed243d3SAndroid Build Coastguard Worker /* Vector Dot Product */
1432*bed243d3SAndroid Build Coastguard Worker /// Computes two dot products in parallel, using the lower and upper
1433*bed243d3SAndroid Build Coastguard Worker ///    halves of two [8 x float] vectors as input to the two computations, and
1434*bed243d3SAndroid Build Coastguard Worker ///    returning the two dot products in the lower and upper halves of the
1435*bed243d3SAndroid Build Coastguard Worker ///    [8 x float] result.
1436*bed243d3SAndroid Build Coastguard Worker ///
1437*bed243d3SAndroid Build Coastguard Worker ///    The immediate integer operand controls which input elements will
1438*bed243d3SAndroid Build Coastguard Worker ///    contribute to the dot product, and where the final results are returned.
1439*bed243d3SAndroid Build Coastguard Worker ///    In general, for each dot product, the four corresponding elements of the
1440*bed243d3SAndroid Build Coastguard Worker ///    input vectors are multiplied; the first two and second two products are
1441*bed243d3SAndroid Build Coastguard Worker ///    summed, then the two sums are added to form the final result.
1442*bed243d3SAndroid Build Coastguard Worker ///
1443*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1444*bed243d3SAndroid Build Coastguard Worker ///
1445*bed243d3SAndroid Build Coastguard Worker /// \code
1446*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);
1447*bed243d3SAndroid Build Coastguard Worker /// \endcode
1448*bed243d3SAndroid Build Coastguard Worker ///
1449*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VDPPS </c> instruction.
1450*bed243d3SAndroid Build Coastguard Worker ///
1451*bed243d3SAndroid Build Coastguard Worker /// \param V1
1452*bed243d3SAndroid Build Coastguard Worker ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1453*bed243d3SAndroid Build Coastguard Worker /// \param V2
1454*bed243d3SAndroid Build Coastguard Worker ///    A vector of [8 x float] values, treated as two [4 x float] vectors.
1455*bed243d3SAndroid Build Coastguard Worker /// \param M
1456*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer argument. Bits [7:4] determine which elements of
1457*bed243d3SAndroid Build Coastguard Worker ///    the input vectors are used, with bit [4] corresponding to the lowest
1458*bed243d3SAndroid Build Coastguard Worker ///    element and bit [7] corresponding to the highest element of each [4 x
1459*bed243d3SAndroid Build Coastguard Worker ///    float] subvector. If a bit is set, the corresponding elements from the
1460*bed243d3SAndroid Build Coastguard Worker ///    two input vectors are used as an input for dot product; otherwise that
1461*bed243d3SAndroid Build Coastguard Worker ///    input is treated as zero. Bits [3:0] determine which elements of the
1462*bed243d3SAndroid Build Coastguard Worker ///    result will receive a copy of the final dot product, with bit [0]
1463*bed243d3SAndroid Build Coastguard Worker ///    corresponding to the lowest element and bit [3] corresponding to the
1464*bed243d3SAndroid Build Coastguard Worker ///    highest element of each [4 x float] subvector. If a bit is set, the dot
1465*bed243d3SAndroid Build Coastguard Worker ///    product is returned in the corresponding element; otherwise that element
1466*bed243d3SAndroid Build Coastguard Worker ///    is set to zero. The bitmask is applied in the same way to each of the
1467*bed243d3SAndroid Build Coastguard Worker ///    two parallel dot product computations.
1468*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the two dot products.
1469*bed243d3SAndroid Build Coastguard Worker #define _mm256_dp_ps(V1, V2, M) \
1470*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
1471*bed243d3SAndroid Build Coastguard Worker                                   (__v8sf)(__m256)(V2), (M)))
1472*bed243d3SAndroid Build Coastguard Worker 
1473*bed243d3SAndroid Build Coastguard Worker /* Vector shuffle */
1474*bed243d3SAndroid Build Coastguard Worker /// Selects 8 float values from the 256-bit operands of [8 x float], as
1475*bed243d3SAndroid Build Coastguard Worker ///    specified by the immediate value operand.
1476*bed243d3SAndroid Build Coastguard Worker ///
1477*bed243d3SAndroid Build Coastguard Worker ///    The four selected elements in each operand are copied to the destination
1478*bed243d3SAndroid Build Coastguard Worker ///    according to the bits specified in the immediate operand. The selected
1479*bed243d3SAndroid Build Coastguard Worker ///    elements from the first 256-bit operand are copied to bits [63:0] and
1480*bed243d3SAndroid Build Coastguard Worker ///    bits [191:128] of the destination, and the selected elements from the
1481*bed243d3SAndroid Build Coastguard Worker ///    second 256-bit operand are copied to bits [127:64] and bits [255:192] of
1482*bed243d3SAndroid Build Coastguard Worker ///    the destination. For example, if bits [7:0] of the immediate operand
1483*bed243d3SAndroid Build Coastguard Worker ///    contain a value of 0xFF, the 256-bit destination vector would contain the
1484*bed243d3SAndroid Build Coastguard Worker ///    following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].
1485*bed243d3SAndroid Build Coastguard Worker ///
1486*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1487*bed243d3SAndroid Build Coastguard Worker ///
1488*bed243d3SAndroid Build Coastguard Worker /// \code
1489*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);
1490*bed243d3SAndroid Build Coastguard Worker /// \endcode
1491*bed243d3SAndroid Build Coastguard Worker ///
1492*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.
1493*bed243d3SAndroid Build Coastguard Worker ///
1494*bed243d3SAndroid Build Coastguard Worker /// \param a
1495*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. The four selected elements in this
1496*bed243d3SAndroid Build Coastguard Worker ///    operand are copied to bits [63:0] and bits [191:128] in the destination,
1497*bed243d3SAndroid Build Coastguard Worker ///    according to the bits specified in the immediate operand.
1498*bed243d3SAndroid Build Coastguard Worker /// \param b
1499*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. The four selected elements in this
1500*bed243d3SAndroid Build Coastguard Worker ///    operand are copied to bits [127:64] and bits [255:192] in the
1501*bed243d3SAndroid Build Coastguard Worker ///    destination, according to the bits specified in the immediate operand.
1502*bed243d3SAndroid Build Coastguard Worker /// \param mask
1503*bed243d3SAndroid Build Coastguard Worker ///    An immediate value containing an 8-bit value specifying which elements to
1504*bed243d3SAndroid Build Coastguard Worker ///    copy from \a a and \a b \n.
1505*bed243d3SAndroid Build Coastguard Worker ///    Bits [3:0] specify the values copied from operand \a a. \n
1506*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:4] specify the values copied from operand \a b. \n
1507*bed243d3SAndroid Build Coastguard Worker ///    The destinations within the 256-bit destination are assigned values as
1508*bed243d3SAndroid Build Coastguard Worker ///    follows, according to the bit value assignments described below: \n
1509*bed243d3SAndroid Build Coastguard Worker ///    Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the
1510*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1511*bed243d3SAndroid Build Coastguard Worker ///    Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the
1512*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1513*bed243d3SAndroid Build Coastguard Worker ///    Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the
1514*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1515*bed243d3SAndroid Build Coastguard Worker ///    Bits [7:6] are used to assign values to bits [127:96] and [255:224] in
1516*bed243d3SAndroid Build Coastguard Worker ///    the destination. \n
1517*bed243d3SAndroid Build Coastguard Worker ///    Bit value assignments: \n
1518*bed243d3SAndroid Build Coastguard Worker ///    00: Bits [31:0] and [159:128] are copied from the selected operand. \n
1519*bed243d3SAndroid Build Coastguard Worker ///    01: Bits [63:32] and [191:160] are copied from the selected operand. \n
1520*bed243d3SAndroid Build Coastguard Worker ///    10: Bits [95:64] and [223:192] are copied from the selected operand. \n
1521*bed243d3SAndroid Build Coastguard Worker ///    11: Bits [127:96] and [255:224] are copied from the selected operand. \n
1522*bed243d3SAndroid Build Coastguard Worker ///    Note: To generate a mask, you can use the \c _MM_SHUFFLE macro.
1523*bed243d3SAndroid Build Coastguard Worker ///    <c>_MM_SHUFFLE(b6, b4, b2, b0)</c> can create an 8-bit mask of the form
1524*bed243d3SAndroid Build Coastguard Worker ///    <c>[b6, b4, b2, b0]</c>.
1525*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the shuffled values.
1526*bed243d3SAndroid Build Coastguard Worker #define _mm256_shuffle_ps(a, b, mask) \
1527*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
1528*bed243d3SAndroid Build Coastguard Worker                                     (__v8sf)(__m256)(b), (int)(mask)))
1529*bed243d3SAndroid Build Coastguard Worker 
1530*bed243d3SAndroid Build Coastguard Worker /// Selects four double-precision values from the 256-bit operands of
1531*bed243d3SAndroid Build Coastguard Worker ///    [4 x double], as specified by the immediate value operand.
1532*bed243d3SAndroid Build Coastguard Worker ///
1533*bed243d3SAndroid Build Coastguard Worker ///    The selected elements from the first 256-bit operand are copied to bits
1534*bed243d3SAndroid Build Coastguard Worker ///    [63:0] and bits [191:128] in the destination, and the selected elements
1535*bed243d3SAndroid Build Coastguard Worker ///    from the second 256-bit operand are copied to bits [127:64] and bits
1536*bed243d3SAndroid Build Coastguard Worker ///    [255:192] in the destination. For example, if bits [3:0] of the immediate
1537*bed243d3SAndroid Build Coastguard Worker ///    operand contain a value of 0xF, the 256-bit destination vector would
1538*bed243d3SAndroid Build Coastguard Worker ///    contain the following values: b[3], a[3], b[1], a[1].
1539*bed243d3SAndroid Build Coastguard Worker ///
1540*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1541*bed243d3SAndroid Build Coastguard Worker ///
1542*bed243d3SAndroid Build Coastguard Worker /// \code
1543*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);
1544*bed243d3SAndroid Build Coastguard Worker /// \endcode
1545*bed243d3SAndroid Build Coastguard Worker ///
1546*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.
1547*bed243d3SAndroid Build Coastguard Worker ///
1548*bed243d3SAndroid Build Coastguard Worker /// \param a
1549*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1550*bed243d3SAndroid Build Coastguard Worker /// \param b
1551*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1552*bed243d3SAndroid Build Coastguard Worker /// \param mask
1553*bed243d3SAndroid Build Coastguard Worker ///    An immediate value containing 8-bit values specifying which elements to
1554*bed243d3SAndroid Build Coastguard Worker ///    copy from \a a and \a b: \n
1555*bed243d3SAndroid Build Coastguard Worker ///    Bit [0]=0: Bits [63:0] are copied from \a a to bits [63:0] of the
1556*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1557*bed243d3SAndroid Build Coastguard Worker ///    Bit [0]=1: Bits [127:64] are copied from \a a to bits [63:0] of the
1558*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1559*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]=0: Bits [63:0] are copied from \a b to bits [127:64] of the
1560*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1561*bed243d3SAndroid Build Coastguard Worker ///    Bit [1]=1: Bits [127:64] are copied from \a b to bits [127:64] of the
1562*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1563*bed243d3SAndroid Build Coastguard Worker ///    Bit [2]=0: Bits [191:128] are copied from \a a to bits [191:128] of the
1564*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1565*bed243d3SAndroid Build Coastguard Worker ///    Bit [2]=1: Bits [255:192] are copied from \a a to bits [191:128] of the
1566*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1567*bed243d3SAndroid Build Coastguard Worker ///    Bit [3]=0: Bits [191:128] are copied from \a b to bits [255:192] of the
1568*bed243d3SAndroid Build Coastguard Worker ///    destination. \n
1569*bed243d3SAndroid Build Coastguard Worker ///    Bit [3]=1: Bits [255:192] are copied from \a b to bits [255:192] of the
1570*bed243d3SAndroid Build Coastguard Worker ///    destination.
1571*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the shuffled values.
1572*bed243d3SAndroid Build Coastguard Worker #define _mm256_shuffle_pd(a, b, mask) \
1573*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
1574*bed243d3SAndroid Build Coastguard Worker                                      (__v4df)(__m256d)(b), (int)(mask)))
1575*bed243d3SAndroid Build Coastguard Worker 
1576*bed243d3SAndroid Build Coastguard Worker /* Compare */
1577*bed243d3SAndroid Build Coastguard Worker #define _CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
1578*bed243d3SAndroid Build Coastguard Worker #define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unordered, signaling)  */
1579*bed243d3SAndroid Build Coastguard Worker #define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
1580*bed243d3SAndroid Build Coastguard Worker #define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
1581*bed243d3SAndroid Build Coastguard Worker #define _CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
1582*bed243d3SAndroid Build Coastguard Worker #define _CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered, signaling)  */
1583*bed243d3SAndroid Build Coastguard Worker #define _CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
1584*bed243d3SAndroid Build Coastguard Worker #define _CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
1585*bed243d3SAndroid Build Coastguard Worker #define _CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
1586*bed243d3SAndroid Build Coastguard Worker #define _CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
1587*bed243d3SAndroid Build Coastguard Worker #define _CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered, non-signaling)  */
1588*bed243d3SAndroid Build Coastguard Worker #define _CMP_UNORD_S  0x13 /* Unordered (signaling)  */
1589*bed243d3SAndroid Build Coastguard Worker #define _CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
1590*bed243d3SAndroid Build Coastguard Worker #define _CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling)  */
1591*bed243d3SAndroid Build Coastguard Worker #define _CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unordered, non-signaling)  */
1592*bed243d3SAndroid Build Coastguard Worker #define _CMP_ORD_S    0x17 /* Ordered (signaling)  */
1593*bed243d3SAndroid Build Coastguard Worker #define _CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
1594*bed243d3SAndroid Build Coastguard Worker #define _CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unordered, non-signaling)  */
1595*bed243d3SAndroid Build Coastguard Worker #define _CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling)  */
1596*bed243d3SAndroid Build Coastguard Worker #define _CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
1597*bed243d3SAndroid Build Coastguard Worker #define _CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
1598*bed243d3SAndroid Build Coastguard Worker #define _CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered, non-signaling)  */
1599*bed243d3SAndroid Build Coastguard Worker #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
1600*bed243d3SAndroid Build Coastguard Worker #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
1601*bed243d3SAndroid Build Coastguard Worker 
1602*bed243d3SAndroid Build Coastguard Worker /* Below intrinsic defined in emmintrin.h can be used for AVX */
1603*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding double-precision values of two
1604*bed243d3SAndroid Build Coastguard Worker ///    128-bit vectors of [2 x double], using the operation specified by the
1605*bed243d3SAndroid Build Coastguard Worker ///    immediate integer operand.
1606*bed243d3SAndroid Build Coastguard Worker ///
1607*bed243d3SAndroid Build Coastguard Worker ///    Returns a [2 x double] vector consisting of two doubles corresponding to
1608*bed243d3SAndroid Build Coastguard Worker ///    the two comparison results: zero if the comparison is false, and all 1's
1609*bed243d3SAndroid Build Coastguard Worker ///    if the comparison is true.
1610*bed243d3SAndroid Build Coastguard Worker ///
1611*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1612*bed243d3SAndroid Build Coastguard Worker ///
1613*bed243d3SAndroid Build Coastguard Worker /// \code
1614*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
1615*bed243d3SAndroid Build Coastguard Worker /// \endcode
1616*bed243d3SAndroid Build Coastguard Worker ///
1617*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
1618*bed243d3SAndroid Build Coastguard Worker ///
1619*bed243d3SAndroid Build Coastguard Worker /// \param a
1620*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1621*bed243d3SAndroid Build Coastguard Worker /// \param b
1622*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1623*bed243d3SAndroid Build Coastguard Worker /// \param c
1624*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1625*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1626*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1627*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1628*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1629*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1630*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1631*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1632*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1633*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1634*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1635*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1636*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1637*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1638*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1639*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1640*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1641*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1642*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1643*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1644*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1645*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1646*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1647*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1648*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1649*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1650*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1651*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1652*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1653*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1654*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1655*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1656*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1657*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1658*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1659*bed243d3SAndroid Build Coastguard Worker /// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
1660*bed243d3SAndroid Build Coastguard Worker 
1661*bed243d3SAndroid Build Coastguard Worker /* Below intrinsic defined in xmmintrin.h can be used for AVX */
1662*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding values of two 128-bit vectors of
1663*bed243d3SAndroid Build Coastguard Worker ///    [4 x float], using the operation specified by the immediate integer
1664*bed243d3SAndroid Build Coastguard Worker ///    operand.
1665*bed243d3SAndroid Build Coastguard Worker ///
1666*bed243d3SAndroid Build Coastguard Worker ///    Returns a [4 x float] vector consisting of four floats corresponding to
1667*bed243d3SAndroid Build Coastguard Worker ///    the four comparison results: zero if the comparison is false, and all 1's
1668*bed243d3SAndroid Build Coastguard Worker ///    if the comparison is true.
1669*bed243d3SAndroid Build Coastguard Worker ///
1670*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1671*bed243d3SAndroid Build Coastguard Worker ///
1672*bed243d3SAndroid Build Coastguard Worker /// \code
1673*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
1674*bed243d3SAndroid Build Coastguard Worker /// \endcode
1675*bed243d3SAndroid Build Coastguard Worker ///
1676*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
1677*bed243d3SAndroid Build Coastguard Worker ///
1678*bed243d3SAndroid Build Coastguard Worker /// \param a
1679*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1680*bed243d3SAndroid Build Coastguard Worker /// \param b
1681*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1682*bed243d3SAndroid Build Coastguard Worker /// \param c
1683*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1684*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1685*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1686*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1687*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1688*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1689*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1690*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1691*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1692*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1693*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1694*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1695*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1696*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1697*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1698*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1699*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1700*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1701*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1702*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1703*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1704*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1705*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1706*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1707*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1708*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1709*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1710*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1711*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1712*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1713*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1714*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1715*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1716*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1717*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1718*bed243d3SAndroid Build Coastguard Worker /// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
1719*bed243d3SAndroid Build Coastguard Worker 
1720*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding double-precision values of two
1721*bed243d3SAndroid Build Coastguard Worker ///    256-bit vectors of [4 x double], using the operation specified by the
1722*bed243d3SAndroid Build Coastguard Worker ///    immediate integer operand.
1723*bed243d3SAndroid Build Coastguard Worker ///
1724*bed243d3SAndroid Build Coastguard Worker ///    Returns a [4 x double] vector consisting of four doubles corresponding to
1725*bed243d3SAndroid Build Coastguard Worker ///    the four comparison results: zero if the comparison is false, and all 1's
1726*bed243d3SAndroid Build Coastguard Worker ///    if the comparison is true.
1727*bed243d3SAndroid Build Coastguard Worker ///
1728*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1729*bed243d3SAndroid Build Coastguard Worker ///
1730*bed243d3SAndroid Build Coastguard Worker /// \code
1731*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);
1732*bed243d3SAndroid Build Coastguard Worker /// \endcode
1733*bed243d3SAndroid Build Coastguard Worker ///
1734*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
1735*bed243d3SAndroid Build Coastguard Worker ///
1736*bed243d3SAndroid Build Coastguard Worker /// \param a
1737*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1738*bed243d3SAndroid Build Coastguard Worker /// \param b
1739*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
1740*bed243d3SAndroid Build Coastguard Worker /// \param c
1741*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1742*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1743*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1744*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1745*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1746*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1747*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1748*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1749*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1750*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1751*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1752*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1753*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1754*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1755*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1756*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1757*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1758*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1759*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1760*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1761*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1762*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1763*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1764*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1765*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1766*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1767*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1768*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1769*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1770*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1771*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1772*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1773*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1774*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1775*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the comparison results.
1776*bed243d3SAndroid Build Coastguard Worker #define _mm256_cmp_pd(a, b, c) \
1777*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
1778*bed243d3SAndroid Build Coastguard Worker                                     (__v4df)(__m256d)(b), (c)))
1779*bed243d3SAndroid Build Coastguard Worker 
1780*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding values of two 256-bit vectors of
1781*bed243d3SAndroid Build Coastguard Worker ///    [8 x float], using the operation specified by the immediate integer
1782*bed243d3SAndroid Build Coastguard Worker ///    operand.
1783*bed243d3SAndroid Build Coastguard Worker ///
1784*bed243d3SAndroid Build Coastguard Worker ///    Returns a [8 x float] vector consisting of eight floats corresponding to
1785*bed243d3SAndroid Build Coastguard Worker ///    the eight comparison results: zero if the comparison is false, and all
1786*bed243d3SAndroid Build Coastguard Worker ///    1's if the comparison is true.
1787*bed243d3SAndroid Build Coastguard Worker ///
1788*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1789*bed243d3SAndroid Build Coastguard Worker ///
1790*bed243d3SAndroid Build Coastguard Worker /// \code
1791*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);
1792*bed243d3SAndroid Build Coastguard Worker /// \endcode
1793*bed243d3SAndroid Build Coastguard Worker ///
1794*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
1795*bed243d3SAndroid Build Coastguard Worker ///
1796*bed243d3SAndroid Build Coastguard Worker /// \param a
1797*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1798*bed243d3SAndroid Build Coastguard Worker /// \param b
1799*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
1800*bed243d3SAndroid Build Coastguard Worker /// \param c
1801*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1802*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1803*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1804*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1805*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1806*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1807*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1808*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1809*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1810*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1811*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1812*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1813*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1814*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1815*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1816*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1817*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1818*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1819*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1820*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1821*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1822*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1823*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1824*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1825*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1826*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1827*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1828*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1829*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1830*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1831*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1832*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1833*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1834*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1835*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the comparison results.
1836*bed243d3SAndroid Build Coastguard Worker #define _mm256_cmp_ps(a, b, c) \
1837*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
1838*bed243d3SAndroid Build Coastguard Worker                                    (__v8sf)(__m256)(b), (c)))
1839*bed243d3SAndroid Build Coastguard Worker 
1840*bed243d3SAndroid Build Coastguard Worker /* Below intrinsic defined in emmintrin.h can be used for AVX */
1841*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding scalar double-precision values of
1842*bed243d3SAndroid Build Coastguard Worker ///    two 128-bit vectors of [2 x double], using the operation specified by the
1843*bed243d3SAndroid Build Coastguard Worker ///    immediate integer operand.
1844*bed243d3SAndroid Build Coastguard Worker ///
1845*bed243d3SAndroid Build Coastguard Worker ///    If the result is true, all 64 bits of the destination vector are set;
1846*bed243d3SAndroid Build Coastguard Worker ///    otherwise they are cleared.
1847*bed243d3SAndroid Build Coastguard Worker ///
1848*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1849*bed243d3SAndroid Build Coastguard Worker ///
1850*bed243d3SAndroid Build Coastguard Worker /// \code
1851*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
1852*bed243d3SAndroid Build Coastguard Worker /// \endcode
1853*bed243d3SAndroid Build Coastguard Worker ///
1854*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPSD </c> instruction.
1855*bed243d3SAndroid Build Coastguard Worker ///
1856*bed243d3SAndroid Build Coastguard Worker /// \param a
1857*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1858*bed243d3SAndroid Build Coastguard Worker /// \param b
1859*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
1860*bed243d3SAndroid Build Coastguard Worker /// \param c
1861*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1862*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1863*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1864*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1865*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1866*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1867*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1868*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1869*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1870*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1871*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1872*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1873*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1874*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1875*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1876*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1877*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1878*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1879*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1880*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1881*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1882*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1883*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1884*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1885*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1886*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1887*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1888*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1889*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1890*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1891*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1892*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1893*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1894*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1895*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the comparison results.
1896*bed243d3SAndroid Build Coastguard Worker /// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
1897*bed243d3SAndroid Build Coastguard Worker 
1898*bed243d3SAndroid Build Coastguard Worker /* Below intrinsic defined in xmmintrin.h can be used for AVX */
1899*bed243d3SAndroid Build Coastguard Worker /// Compares each of the corresponding scalar values of two 128-bit
1900*bed243d3SAndroid Build Coastguard Worker ///    vectors of [4 x float], using the operation specified by the immediate
1901*bed243d3SAndroid Build Coastguard Worker ///    integer operand.
1902*bed243d3SAndroid Build Coastguard Worker ///
1903*bed243d3SAndroid Build Coastguard Worker ///    If the result is true, all 32 bits of the destination vector are set;
1904*bed243d3SAndroid Build Coastguard Worker ///    otherwise they are cleared.
1905*bed243d3SAndroid Build Coastguard Worker ///
1906*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1907*bed243d3SAndroid Build Coastguard Worker ///
1908*bed243d3SAndroid Build Coastguard Worker /// \code
1909*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
1910*bed243d3SAndroid Build Coastguard Worker /// \endcode
1911*bed243d3SAndroid Build Coastguard Worker ///
1912*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCMPSS </c> instruction.
1913*bed243d3SAndroid Build Coastguard Worker ///
1914*bed243d3SAndroid Build Coastguard Worker /// \param a
1915*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1916*bed243d3SAndroid Build Coastguard Worker /// \param b
1917*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
1918*bed243d3SAndroid Build Coastguard Worker /// \param c
1919*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand, with bits [4:0] specifying which comparison
1920*bed243d3SAndroid Build Coastguard Worker ///    operation to use: \n
1921*bed243d3SAndroid Build Coastguard Worker ///    0x00: Equal (ordered, non-signaling) \n
1922*bed243d3SAndroid Build Coastguard Worker ///    0x01: Less-than (ordered, signaling) \n
1923*bed243d3SAndroid Build Coastguard Worker ///    0x02: Less-than-or-equal (ordered, signaling) \n
1924*bed243d3SAndroid Build Coastguard Worker ///    0x03: Unordered (non-signaling) \n
1925*bed243d3SAndroid Build Coastguard Worker ///    0x04: Not-equal (unordered, non-signaling) \n
1926*bed243d3SAndroid Build Coastguard Worker ///    0x05: Not-less-than (unordered, signaling) \n
1927*bed243d3SAndroid Build Coastguard Worker ///    0x06: Not-less-than-or-equal (unordered, signaling) \n
1928*bed243d3SAndroid Build Coastguard Worker ///    0x07: Ordered (non-signaling) \n
1929*bed243d3SAndroid Build Coastguard Worker ///    0x08: Equal (unordered, non-signaling) \n
1930*bed243d3SAndroid Build Coastguard Worker ///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
1931*bed243d3SAndroid Build Coastguard Worker ///    0x0A: Not-greater-than (unordered, signaling) \n
1932*bed243d3SAndroid Build Coastguard Worker ///    0x0B: False (ordered, non-signaling) \n
1933*bed243d3SAndroid Build Coastguard Worker ///    0x0C: Not-equal (ordered, non-signaling) \n
1934*bed243d3SAndroid Build Coastguard Worker ///    0x0D: Greater-than-or-equal (ordered, signaling) \n
1935*bed243d3SAndroid Build Coastguard Worker ///    0x0E: Greater-than (ordered, signaling) \n
1936*bed243d3SAndroid Build Coastguard Worker ///    0x0F: True (unordered, non-signaling) \n
1937*bed243d3SAndroid Build Coastguard Worker ///    0x10: Equal (ordered, signaling) \n
1938*bed243d3SAndroid Build Coastguard Worker ///    0x11: Less-than (ordered, non-signaling) \n
1939*bed243d3SAndroid Build Coastguard Worker ///    0x12: Less-than-or-equal (ordered, non-signaling) \n
1940*bed243d3SAndroid Build Coastguard Worker ///    0x13: Unordered (signaling) \n
1941*bed243d3SAndroid Build Coastguard Worker ///    0x14: Not-equal (unordered, signaling) \n
1942*bed243d3SAndroid Build Coastguard Worker ///    0x15: Not-less-than (unordered, non-signaling) \n
1943*bed243d3SAndroid Build Coastguard Worker ///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1944*bed243d3SAndroid Build Coastguard Worker ///    0x17: Ordered (signaling) \n
1945*bed243d3SAndroid Build Coastguard Worker ///    0x18: Equal (unordered, signaling) \n
1946*bed243d3SAndroid Build Coastguard Worker ///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1947*bed243d3SAndroid Build Coastguard Worker ///    0x1A: Not-greater-than (unordered, non-signaling) \n
1948*bed243d3SAndroid Build Coastguard Worker ///    0x1B: False (ordered, signaling) \n
1949*bed243d3SAndroid Build Coastguard Worker ///    0x1C: Not-equal (ordered, signaling) \n
1950*bed243d3SAndroid Build Coastguard Worker ///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1951*bed243d3SAndroid Build Coastguard Worker ///    0x1E: Greater-than (ordered, non-signaling) \n
1952*bed243d3SAndroid Build Coastguard Worker ///    0x1F: True (unordered, signaling)
1953*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the comparison results.
1954*bed243d3SAndroid Build Coastguard Worker /// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
1955*bed243d3SAndroid Build Coastguard Worker 
1956*bed243d3SAndroid Build Coastguard Worker /// Takes a [8 x i32] vector and returns the vector element value
1957*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1958*bed243d3SAndroid Build Coastguard Worker ///
1959*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1960*bed243d3SAndroid Build Coastguard Worker ///
1961*bed243d3SAndroid Build Coastguard Worker /// \code
1962*bed243d3SAndroid Build Coastguard Worker /// int _mm256_extract_epi32(__m256i X, const int N);
1963*bed243d3SAndroid Build Coastguard Worker /// \endcode
1964*bed243d3SAndroid Build Coastguard Worker ///
1965*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
1966*bed243d3SAndroid Build Coastguard Worker ///   instruction.
1967*bed243d3SAndroid Build Coastguard Worker ///
1968*bed243d3SAndroid Build Coastguard Worker /// \param X
1969*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x i32].
1970*bed243d3SAndroid Build Coastguard Worker /// \param N
1971*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand with bits [2:0] determining which vector
1972*bed243d3SAndroid Build Coastguard Worker ///    element is extracted and returned.
1973*bed243d3SAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 32 bits of extended
1974*bed243d3SAndroid Build Coastguard Worker ///    packed data.
1975*bed243d3SAndroid Build Coastguard Worker #define _mm256_extract_epi32(X, N) \
1976*bed243d3SAndroid Build Coastguard Worker   ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N)))
1977*bed243d3SAndroid Build Coastguard Worker 
1978*bed243d3SAndroid Build Coastguard Worker /// Takes a [16 x i16] vector and returns the vector element value
1979*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
1980*bed243d3SAndroid Build Coastguard Worker ///
1981*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
1982*bed243d3SAndroid Build Coastguard Worker ///
1983*bed243d3SAndroid Build Coastguard Worker /// \code
1984*bed243d3SAndroid Build Coastguard Worker /// int _mm256_extract_epi16(__m256i X, const int N);
1985*bed243d3SAndroid Build Coastguard Worker /// \endcode
1986*bed243d3SAndroid Build Coastguard Worker ///
1987*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
1988*bed243d3SAndroid Build Coastguard Worker ///   instruction.
1989*bed243d3SAndroid Build Coastguard Worker ///
1990*bed243d3SAndroid Build Coastguard Worker /// \param X
1991*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [16 x i16].
1992*bed243d3SAndroid Build Coastguard Worker /// \param N
1993*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand with bits [3:0] determining which vector
1994*bed243d3SAndroid Build Coastguard Worker ///    element is extracted and returned.
1995*bed243d3SAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 16 bits of zero extended
1996*bed243d3SAndroid Build Coastguard Worker ///    packed data.
1997*bed243d3SAndroid Build Coastguard Worker #define _mm256_extract_epi16(X, N) \
1998*bed243d3SAndroid Build Coastguard Worker   ((int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
1999*bed243d3SAndroid Build Coastguard Worker                                                      (int)(N)))
2000*bed243d3SAndroid Build Coastguard Worker 
2001*bed243d3SAndroid Build Coastguard Worker /// Takes a [32 x i8] vector and returns the vector element value
2002*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
2003*bed243d3SAndroid Build Coastguard Worker ///
2004*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2005*bed243d3SAndroid Build Coastguard Worker ///
2006*bed243d3SAndroid Build Coastguard Worker /// \code
2007*bed243d3SAndroid Build Coastguard Worker /// int _mm256_extract_epi8(__m256i X, const int N);
2008*bed243d3SAndroid Build Coastguard Worker /// \endcode
2009*bed243d3SAndroid Build Coastguard Worker ///
2010*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
2011*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2012*bed243d3SAndroid Build Coastguard Worker ///
2013*bed243d3SAndroid Build Coastguard Worker /// \param X
2014*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [32 x i8].
2015*bed243d3SAndroid Build Coastguard Worker /// \param N
2016*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand with bits [4:0] determining which vector
2017*bed243d3SAndroid Build Coastguard Worker ///    element is extracted and returned.
2018*bed243d3SAndroid Build Coastguard Worker /// \returns A 32-bit integer containing the extracted 8 bits of zero extended
2019*bed243d3SAndroid Build Coastguard Worker ///    packed data.
2020*bed243d3SAndroid Build Coastguard Worker #define _mm256_extract_epi8(X, N) \
2021*bed243d3SAndroid Build Coastguard Worker   ((int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
2022*bed243d3SAndroid Build Coastguard Worker                                                     (int)(N)))
2023*bed243d3SAndroid Build Coastguard Worker 
2024*bed243d3SAndroid Build Coastguard Worker #ifdef __x86_64__
2025*bed243d3SAndroid Build Coastguard Worker /// Takes a [4 x i64] vector and returns the vector element value
2026*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand.
2027*bed243d3SAndroid Build Coastguard Worker ///
2028*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2029*bed243d3SAndroid Build Coastguard Worker ///
2030*bed243d3SAndroid Build Coastguard Worker /// \code
2031*bed243d3SAndroid Build Coastguard Worker /// long long _mm256_extract_epi64(__m256i X, const int N);
2032*bed243d3SAndroid Build Coastguard Worker /// \endcode
2033*bed243d3SAndroid Build Coastguard Worker ///
2034*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>
2035*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2036*bed243d3SAndroid Build Coastguard Worker ///
2037*bed243d3SAndroid Build Coastguard Worker /// \param X
2038*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [4 x i64].
2039*bed243d3SAndroid Build Coastguard Worker /// \param N
2040*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer operand with bits [1:0] determining which vector
2041*bed243d3SAndroid Build Coastguard Worker ///    element is extracted and returned.
2042*bed243d3SAndroid Build Coastguard Worker /// \returns A 64-bit integer containing the extracted 64 bits of extended
2043*bed243d3SAndroid Build Coastguard Worker ///    packed data.
2044*bed243d3SAndroid Build Coastguard Worker #define _mm256_extract_epi64(X, N) \
2045*bed243d3SAndroid Build Coastguard Worker   ((long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N)))
2046*bed243d3SAndroid Build Coastguard Worker #endif
2047*bed243d3SAndroid Build Coastguard Worker 
2048*bed243d3SAndroid Build Coastguard Worker /// Takes a [8 x i32] vector and replaces the vector element value
2049*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand by a new value. Returns the
2050*bed243d3SAndroid Build Coastguard Worker ///    modified vector.
2051*bed243d3SAndroid Build Coastguard Worker ///
2052*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2053*bed243d3SAndroid Build Coastguard Worker ///
2054*bed243d3SAndroid Build Coastguard Worker /// \code
2055*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_insert_epi32(__m256i X, int I, const int N);
2056*bed243d3SAndroid Build Coastguard Worker /// \endcode
2057*bed243d3SAndroid Build Coastguard Worker ///
2058*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
2059*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2060*bed243d3SAndroid Build Coastguard Worker ///
2061*bed243d3SAndroid Build Coastguard Worker /// \param X
2062*bed243d3SAndroid Build Coastguard Worker ///    A vector of [8 x i32] to be used by the insert operation.
2063*bed243d3SAndroid Build Coastguard Worker /// \param I
2064*bed243d3SAndroid Build Coastguard Worker ///    An integer value. The replacement value for the insert operation.
2065*bed243d3SAndroid Build Coastguard Worker /// \param N
2066*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2067*bed243d3SAndroid Build Coastguard Worker ///    replaced.
2068*bed243d3SAndroid Build Coastguard Worker /// \returns A copy of vector \a X, after replacing its element indexed by
2069*bed243d3SAndroid Build Coastguard Worker ///    \a N with \a I.
2070*bed243d3SAndroid Build Coastguard Worker #define _mm256_insert_epi32(X, I, N) \
2071*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
2072*bed243d3SAndroid Build Coastguard Worker                                         (int)(I), (int)(N)))
2073*bed243d3SAndroid Build Coastguard Worker 
2074*bed243d3SAndroid Build Coastguard Worker 
2075*bed243d3SAndroid Build Coastguard Worker /// Takes a [16 x i16] vector and replaces the vector element value
2076*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
2077*bed243d3SAndroid Build Coastguard Worker ///    modified vector.
2078*bed243d3SAndroid Build Coastguard Worker ///
2079*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2080*bed243d3SAndroid Build Coastguard Worker ///
2081*bed243d3SAndroid Build Coastguard Worker /// \code
2082*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_insert_epi16(__m256i X, int I, const int N);
2083*bed243d3SAndroid Build Coastguard Worker /// \endcode
2084*bed243d3SAndroid Build Coastguard Worker ///
2085*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
2086*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2087*bed243d3SAndroid Build Coastguard Worker ///
2088*bed243d3SAndroid Build Coastguard Worker /// \param X
2089*bed243d3SAndroid Build Coastguard Worker ///    A vector of [16 x i16] to be used by the insert operation.
2090*bed243d3SAndroid Build Coastguard Worker /// \param I
2091*bed243d3SAndroid Build Coastguard Worker ///    An i16 integer value. The replacement value for the insert operation.
2092*bed243d3SAndroid Build Coastguard Worker /// \param N
2093*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2094*bed243d3SAndroid Build Coastguard Worker ///    replaced.
2095*bed243d3SAndroid Build Coastguard Worker /// \returns A copy of vector \a X, after replacing its element indexed by
2096*bed243d3SAndroid Build Coastguard Worker ///    \a N with \a I.
2097*bed243d3SAndroid Build Coastguard Worker #define _mm256_insert_epi16(X, I, N) \
2098*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
2099*bed243d3SAndroid Build Coastguard Worker                                          (int)(I), (int)(N)))
2100*bed243d3SAndroid Build Coastguard Worker 
2101*bed243d3SAndroid Build Coastguard Worker /// Takes a [32 x i8] vector and replaces the vector element value
2102*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
2103*bed243d3SAndroid Build Coastguard Worker ///    modified vector.
2104*bed243d3SAndroid Build Coastguard Worker ///
2105*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2106*bed243d3SAndroid Build Coastguard Worker ///
2107*bed243d3SAndroid Build Coastguard Worker /// \code
2108*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_insert_epi8(__m256i X, int I, const int N);
2109*bed243d3SAndroid Build Coastguard Worker /// \endcode
2110*bed243d3SAndroid Build Coastguard Worker ///
2111*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
2112*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2113*bed243d3SAndroid Build Coastguard Worker ///
2114*bed243d3SAndroid Build Coastguard Worker /// \param X
2115*bed243d3SAndroid Build Coastguard Worker ///    A vector of [32 x i8] to be used by the insert operation.
2116*bed243d3SAndroid Build Coastguard Worker /// \param I
2117*bed243d3SAndroid Build Coastguard Worker ///    An i8 integer value. The replacement value for the insert operation.
2118*bed243d3SAndroid Build Coastguard Worker /// \param N
2119*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2120*bed243d3SAndroid Build Coastguard Worker ///    replaced.
2121*bed243d3SAndroid Build Coastguard Worker /// \returns A copy of vector \a X, after replacing its element indexed by
2122*bed243d3SAndroid Build Coastguard Worker ///    \a N with \a I.
2123*bed243d3SAndroid Build Coastguard Worker #define _mm256_insert_epi8(X, I, N) \
2124*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
2125*bed243d3SAndroid Build Coastguard Worker                                          (int)(I), (int)(N)))
2126*bed243d3SAndroid Build Coastguard Worker 
2127*bed243d3SAndroid Build Coastguard Worker #ifdef __x86_64__
2128*bed243d3SAndroid Build Coastguard Worker /// Takes a [4 x i64] vector and replaces the vector element value
2129*bed243d3SAndroid Build Coastguard Worker ///    indexed by the immediate constant operand with a new value. Returns the
2130*bed243d3SAndroid Build Coastguard Worker ///    modified vector.
2131*bed243d3SAndroid Build Coastguard Worker ///
2132*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2133*bed243d3SAndroid Build Coastguard Worker ///
2134*bed243d3SAndroid Build Coastguard Worker /// \code
2135*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_insert_epi64(__m256i X, int I, const int N);
2136*bed243d3SAndroid Build Coastguard Worker /// \endcode
2137*bed243d3SAndroid Build Coastguard Worker ///
2138*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>
2139*bed243d3SAndroid Build Coastguard Worker ///   instruction.
2140*bed243d3SAndroid Build Coastguard Worker ///
2141*bed243d3SAndroid Build Coastguard Worker /// \param X
2142*bed243d3SAndroid Build Coastguard Worker ///    A vector of [4 x i64] to be used by the insert operation.
2143*bed243d3SAndroid Build Coastguard Worker /// \param I
2144*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integer value. The replacement value for the insert operation.
2145*bed243d3SAndroid Build Coastguard Worker /// \param N
2146*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer specifying the index of the vector element to be
2147*bed243d3SAndroid Build Coastguard Worker ///    replaced.
2148*bed243d3SAndroid Build Coastguard Worker /// \returns A copy of vector \a X, after replacing its element indexed by
2149*bed243d3SAndroid Build Coastguard Worker ///     \a N with \a I.
2150*bed243d3SAndroid Build Coastguard Worker #define _mm256_insert_epi64(X, I, N) \
2151*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
2152*bed243d3SAndroid Build Coastguard Worker                                         (long long)(I), (int)(N)))
2153*bed243d3SAndroid Build Coastguard Worker #endif
2154*bed243d3SAndroid Build Coastguard Worker 
2155*bed243d3SAndroid Build Coastguard Worker /* Conversion */
2156*bed243d3SAndroid Build Coastguard Worker /// Converts a vector of [4 x i32] into a vector of [4 x double].
2157*bed243d3SAndroid Build Coastguard Worker ///
2158*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2159*bed243d3SAndroid Build Coastguard Worker ///
2160*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.
2161*bed243d3SAndroid Build Coastguard Worker ///
2162*bed243d3SAndroid Build Coastguard Worker /// \param __a
2163*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector of [4 x i32].
2164*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the converted values.
2165*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtepi32_pd(__m128i __a)2166*bed243d3SAndroid Build Coastguard Worker _mm256_cvtepi32_pd(__m128i __a)
2167*bed243d3SAndroid Build Coastguard Worker {
2168*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
2169*bed243d3SAndroid Build Coastguard Worker }
2170*bed243d3SAndroid Build Coastguard Worker 
2171*bed243d3SAndroid Build Coastguard Worker /// Converts a vector of [8 x i32] into a vector of [8 x float].
2172*bed243d3SAndroid Build Coastguard Worker ///
2173*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2174*bed243d3SAndroid Build Coastguard Worker ///
2175*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.
2176*bed243d3SAndroid Build Coastguard Worker ///
2177*bed243d3SAndroid Build Coastguard Worker /// \param __a
2178*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2179*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the converted values.
2180*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_cvtepi32_ps(__m256i __a)2181*bed243d3SAndroid Build Coastguard Worker _mm256_cvtepi32_ps(__m256i __a)
2182*bed243d3SAndroid Build Coastguard Worker {
2183*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);
2184*bed243d3SAndroid Build Coastguard Worker }
2185*bed243d3SAndroid Build Coastguard Worker 
2186*bed243d3SAndroid Build Coastguard Worker /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
2187*bed243d3SAndroid Build Coastguard Worker ///    [4 x float].
2188*bed243d3SAndroid Build Coastguard Worker ///
2189*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2190*bed243d3SAndroid Build Coastguard Worker ///
2191*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.
2192*bed243d3SAndroid Build Coastguard Worker ///
2193*bed243d3SAndroid Build Coastguard Worker /// \param __a
2194*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2195*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the converted values.
2196*bed243d3SAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm256_cvtpd_ps(__m256d __a)2197*bed243d3SAndroid Build Coastguard Worker _mm256_cvtpd_ps(__m256d __a)
2198*bed243d3SAndroid Build Coastguard Worker {
2199*bed243d3SAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
2200*bed243d3SAndroid Build Coastguard Worker }
2201*bed243d3SAndroid Build Coastguard Worker 
2202*bed243d3SAndroid Build Coastguard Worker /// Converts a vector of [8 x float] into a vector of [8 x i32].
2203*bed243d3SAndroid Build Coastguard Worker ///
2204*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2205*bed243d3SAndroid Build Coastguard Worker ///
2206*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.
2207*bed243d3SAndroid Build Coastguard Worker ///
2208*bed243d3SAndroid Build Coastguard Worker /// \param __a
2209*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2210*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the converted values.
2211*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_cvtps_epi32(__m256 __a)2212*bed243d3SAndroid Build Coastguard Worker _mm256_cvtps_epi32(__m256 __a)
2213*bed243d3SAndroid Build Coastguard Worker {
2214*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
2215*bed243d3SAndroid Build Coastguard Worker }
2216*bed243d3SAndroid Build Coastguard Worker 
2217*bed243d3SAndroid Build Coastguard Worker /// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4
2218*bed243d3SAndroid Build Coastguard Worker ///    x double].
2219*bed243d3SAndroid Build Coastguard Worker ///
2220*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2221*bed243d3SAndroid Build Coastguard Worker ///
2222*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.
2223*bed243d3SAndroid Build Coastguard Worker ///
2224*bed243d3SAndroid Build Coastguard Worker /// \param __a
2225*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2226*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the converted values.
2227*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_cvtps_pd(__m128 __a)2228*bed243d3SAndroid Build Coastguard Worker _mm256_cvtps_pd(__m128 __a)
2229*bed243d3SAndroid Build Coastguard Worker {
2230*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
2231*bed243d3SAndroid Build Coastguard Worker }
2232*bed243d3SAndroid Build Coastguard Worker 
2233*bed243d3SAndroid Build Coastguard Worker /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
2234*bed243d3SAndroid Build Coastguard Worker ///    x i32], truncating the result by rounding towards zero when it is
2235*bed243d3SAndroid Build Coastguard Worker ///    inexact.
2236*bed243d3SAndroid Build Coastguard Worker ///
2237*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2238*bed243d3SAndroid Build Coastguard Worker ///
2239*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.
2240*bed243d3SAndroid Build Coastguard Worker ///
2241*bed243d3SAndroid Build Coastguard Worker /// \param __a
2242*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2243*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the converted values.
2244*bed243d3SAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_cvttpd_epi32(__m256d __a)2245*bed243d3SAndroid Build Coastguard Worker _mm256_cvttpd_epi32(__m256d __a)
2246*bed243d3SAndroid Build Coastguard Worker {
2247*bed243d3SAndroid Build Coastguard Worker   return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
2248*bed243d3SAndroid Build Coastguard Worker }
2249*bed243d3SAndroid Build Coastguard Worker 
2250*bed243d3SAndroid Build Coastguard Worker /// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
2251*bed243d3SAndroid Build Coastguard Worker ///    x i32]. When a conversion is inexact, the value returned is rounded
2252*bed243d3SAndroid Build Coastguard Worker ///    according to the rounding control bits in the MXCSR register.
2253*bed243d3SAndroid Build Coastguard Worker ///
2254*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2255*bed243d3SAndroid Build Coastguard Worker ///
2256*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.
2257*bed243d3SAndroid Build Coastguard Worker ///
2258*bed243d3SAndroid Build Coastguard Worker /// \param __a
2259*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2260*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the converted values.
2261*bed243d3SAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_cvtpd_epi32(__m256d __a)2262*bed243d3SAndroid Build Coastguard Worker _mm256_cvtpd_epi32(__m256d __a)
2263*bed243d3SAndroid Build Coastguard Worker {
2264*bed243d3SAndroid Build Coastguard Worker   return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
2265*bed243d3SAndroid Build Coastguard Worker }
2266*bed243d3SAndroid Build Coastguard Worker 
2267*bed243d3SAndroid Build Coastguard Worker /// Converts a vector of [8 x float] into a vector of [8 x i32],
2268*bed243d3SAndroid Build Coastguard Worker ///    truncating the result by rounding towards zero when it is inexact.
2269*bed243d3SAndroid Build Coastguard Worker ///
2270*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2271*bed243d3SAndroid Build Coastguard Worker ///
2272*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.
2273*bed243d3SAndroid Build Coastguard Worker ///
2274*bed243d3SAndroid Build Coastguard Worker /// \param __a
2275*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2276*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the converted values.
2277*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_cvttps_epi32(__m256 __a)2278*bed243d3SAndroid Build Coastguard Worker _mm256_cvttps_epi32(__m256 __a)
2279*bed243d3SAndroid Build Coastguard Worker {
2280*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
2281*bed243d3SAndroid Build Coastguard Worker }
2282*bed243d3SAndroid Build Coastguard Worker 
2283*bed243d3SAndroid Build Coastguard Worker /// Returns the first element of the input vector of [4 x double].
2284*bed243d3SAndroid Build Coastguard Worker ///
2285*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2286*bed243d3SAndroid Build Coastguard Worker ///
2287*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
2288*bed243d3SAndroid Build Coastguard Worker ///    instruction.
2289*bed243d3SAndroid Build Coastguard Worker ///
2290*bed243d3SAndroid Build Coastguard Worker /// \param __a
2291*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2292*bed243d3SAndroid Build Coastguard Worker /// \returns A 64 bit double containing the first element of the input vector.
2293*bed243d3SAndroid Build Coastguard Worker static __inline double __DEFAULT_FN_ATTRS
_mm256_cvtsd_f64(__m256d __a)2294*bed243d3SAndroid Build Coastguard Worker _mm256_cvtsd_f64(__m256d __a)
2295*bed243d3SAndroid Build Coastguard Worker {
2296*bed243d3SAndroid Build Coastguard Worker  return __a[0];
2297*bed243d3SAndroid Build Coastguard Worker }
2298*bed243d3SAndroid Build Coastguard Worker 
2299*bed243d3SAndroid Build Coastguard Worker /// Returns the first element of the input vector of [8 x i32].
2300*bed243d3SAndroid Build Coastguard Worker ///
2301*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2302*bed243d3SAndroid Build Coastguard Worker ///
2303*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
2304*bed243d3SAndroid Build Coastguard Worker ///    instruction.
2305*bed243d3SAndroid Build Coastguard Worker ///
2306*bed243d3SAndroid Build Coastguard Worker /// \param __a
2307*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x i32].
2308*bed243d3SAndroid Build Coastguard Worker /// \returns A 32 bit integer containing the first element of the input vector.
2309*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_cvtsi256_si32(__m256i __a)2310*bed243d3SAndroid Build Coastguard Worker _mm256_cvtsi256_si32(__m256i __a)
2311*bed243d3SAndroid Build Coastguard Worker {
2312*bed243d3SAndroid Build Coastguard Worker  __v8si __b = (__v8si)__a;
2313*bed243d3SAndroid Build Coastguard Worker  return __b[0];
2314*bed243d3SAndroid Build Coastguard Worker }
2315*bed243d3SAndroid Build Coastguard Worker 
2316*bed243d3SAndroid Build Coastguard Worker /// Returns the first element of the input vector of [8 x float].
2317*bed243d3SAndroid Build Coastguard Worker ///
2318*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2319*bed243d3SAndroid Build Coastguard Worker ///
2320*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
2321*bed243d3SAndroid Build Coastguard Worker ///    instruction.
2322*bed243d3SAndroid Build Coastguard Worker ///
2323*bed243d3SAndroid Build Coastguard Worker /// \param __a
2324*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2325*bed243d3SAndroid Build Coastguard Worker /// \returns A 32 bit float containing the first element of the input vector.
2326*bed243d3SAndroid Build Coastguard Worker static __inline float __DEFAULT_FN_ATTRS
_mm256_cvtss_f32(__m256 __a)2327*bed243d3SAndroid Build Coastguard Worker _mm256_cvtss_f32(__m256 __a)
2328*bed243d3SAndroid Build Coastguard Worker {
2329*bed243d3SAndroid Build Coastguard Worker  return __a[0];
2330*bed243d3SAndroid Build Coastguard Worker }
2331*bed243d3SAndroid Build Coastguard Worker 
2332*bed243d3SAndroid Build Coastguard Worker /* Vector replicate */
2333*bed243d3SAndroid Build Coastguard Worker /// Moves and duplicates odd-indexed values from a 256-bit vector of
2334*bed243d3SAndroid Build Coastguard Worker ///    [8 x float] to float values in a 256-bit vector of [8 x float].
2335*bed243d3SAndroid Build Coastguard Worker ///
2336*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2337*bed243d3SAndroid Build Coastguard Worker ///
2338*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
2339*bed243d3SAndroid Build Coastguard Worker ///
2340*bed243d3SAndroid Build Coastguard Worker /// \param __a
2341*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2342*bed243d3SAndroid Build Coastguard Worker ///    Bits [255:224] of \a __a are written to bits [255:224] and [223:192] of
2343*bed243d3SAndroid Build Coastguard Worker ///    the return value. \n
2344*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:160] of \a __a are written to bits [191:160] and [159:128] of
2345*bed243d3SAndroid Build Coastguard Worker ///    the return value. \n
2346*bed243d3SAndroid Build Coastguard Worker ///    Bits [127:96] of \a __a are written to bits [127:96] and [95:64] of the
2347*bed243d3SAndroid Build Coastguard Worker ///    return value. \n
2348*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:32] of \a __a are written to bits [63:32] and [31:0] of the
2349*bed243d3SAndroid Build Coastguard Worker ///    return value.
2350*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
2351*bed243d3SAndroid Build Coastguard Worker ///    values.
2352*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_movehdup_ps(__m256 __a)2353*bed243d3SAndroid Build Coastguard Worker _mm256_movehdup_ps(__m256 __a)
2354*bed243d3SAndroid Build Coastguard Worker {
2355*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
2356*bed243d3SAndroid Build Coastguard Worker }
2357*bed243d3SAndroid Build Coastguard Worker 
2358*bed243d3SAndroid Build Coastguard Worker /// Moves and duplicates even-indexed values from a 256-bit vector of
2359*bed243d3SAndroid Build Coastguard Worker ///    [8 x float] to float values in a 256-bit vector of [8 x float].
2360*bed243d3SAndroid Build Coastguard Worker ///
2361*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2362*bed243d3SAndroid Build Coastguard Worker ///
2363*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
2364*bed243d3SAndroid Build Coastguard Worker ///
2365*bed243d3SAndroid Build Coastguard Worker /// \param __a
2366*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2367*bed243d3SAndroid Build Coastguard Worker ///    Bits [223:192] of \a __a are written to bits [255:224] and [223:192] of
2368*bed243d3SAndroid Build Coastguard Worker ///    the return value. \n
2369*bed243d3SAndroid Build Coastguard Worker ///    Bits [159:128] of \a __a are written to bits [191:160] and [159:128] of
2370*bed243d3SAndroid Build Coastguard Worker ///    the return value. \n
2371*bed243d3SAndroid Build Coastguard Worker ///    Bits [95:64] of \a __a are written to bits [127:96] and [95:64] of the
2372*bed243d3SAndroid Build Coastguard Worker ///    return value. \n
2373*bed243d3SAndroid Build Coastguard Worker ///    Bits [31:0] of \a __a are written to bits [63:32] and [31:0] of the
2374*bed243d3SAndroid Build Coastguard Worker ///    return value.
2375*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the moved and duplicated
2376*bed243d3SAndroid Build Coastguard Worker ///    values.
2377*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_moveldup_ps(__m256 __a)2378*bed243d3SAndroid Build Coastguard Worker _mm256_moveldup_ps(__m256 __a)
2379*bed243d3SAndroid Build Coastguard Worker {
2380*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
2381*bed243d3SAndroid Build Coastguard Worker }
2382*bed243d3SAndroid Build Coastguard Worker 
2383*bed243d3SAndroid Build Coastguard Worker /// Moves and duplicates double-precision floating point values from a
2384*bed243d3SAndroid Build Coastguard Worker ///    256-bit vector of [4 x double] to double-precision values in a 256-bit
2385*bed243d3SAndroid Build Coastguard Worker ///    vector of [4 x double].
2386*bed243d3SAndroid Build Coastguard Worker ///
2387*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2388*bed243d3SAndroid Build Coastguard Worker ///
2389*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
2390*bed243d3SAndroid Build Coastguard Worker ///
2391*bed243d3SAndroid Build Coastguard Worker /// \param __a
2392*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double]. \n
2393*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:0] of \a __a are written to bits [127:64] and [63:0] of the
2394*bed243d3SAndroid Build Coastguard Worker ///    return value. \n
2395*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:128] of \a __a are written to bits [255:192] and [191:128] of
2396*bed243d3SAndroid Build Coastguard Worker ///    the return value.
2397*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the moved and
2398*bed243d3SAndroid Build Coastguard Worker ///    duplicated values.
2399*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_movedup_pd(__m256d __a)2400*bed243d3SAndroid Build Coastguard Worker _mm256_movedup_pd(__m256d __a)
2401*bed243d3SAndroid Build Coastguard Worker {
2402*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
2403*bed243d3SAndroid Build Coastguard Worker }
2404*bed243d3SAndroid Build Coastguard Worker 
2405*bed243d3SAndroid Build Coastguard Worker /* Unpack and Interleave */
2406*bed243d3SAndroid Build Coastguard Worker /// Unpacks the odd-indexed vector elements from two 256-bit vectors of
2407*bed243d3SAndroid Build Coastguard Worker ///    [4 x double] and interleaves them into a 256-bit vector of [4 x double].
2408*bed243d3SAndroid Build Coastguard Worker ///
2409*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2410*bed243d3SAndroid Build Coastguard Worker ///
2411*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.
2412*bed243d3SAndroid Build Coastguard Worker ///
2413*bed243d3SAndroid Build Coastguard Worker /// \param __a
2414*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double]. \n
2415*bed243d3SAndroid Build Coastguard Worker ///    Bits [127:64] are written to bits [63:0] of the return value. \n
2416*bed243d3SAndroid Build Coastguard Worker ///    Bits [255:192] are written to bits [191:128] of the return value. \n
2417*bed243d3SAndroid Build Coastguard Worker /// \param __b
2418*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double]. \n
2419*bed243d3SAndroid Build Coastguard Worker ///    Bits [127:64] are written to bits [127:64] of the return value. \n
2420*bed243d3SAndroid Build Coastguard Worker ///    Bits [255:192] are written to bits [255:192] of the return value. \n
2421*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2422*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_unpackhi_pd(__m256d __a,__m256d __b)2423*bed243d3SAndroid Build Coastguard Worker _mm256_unpackhi_pd(__m256d __a, __m256d __b)
2424*bed243d3SAndroid Build Coastguard Worker {
2425*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
2426*bed243d3SAndroid Build Coastguard Worker }
2427*bed243d3SAndroid Build Coastguard Worker 
2428*bed243d3SAndroid Build Coastguard Worker /// Unpacks the even-indexed vector elements from two 256-bit vectors of
2429*bed243d3SAndroid Build Coastguard Worker ///    [4 x double] and interleaves them into a 256-bit vector of [4 x double].
2430*bed243d3SAndroid Build Coastguard Worker ///
2431*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2432*bed243d3SAndroid Build Coastguard Worker ///
2433*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.
2434*bed243d3SAndroid Build Coastguard Worker ///
2435*bed243d3SAndroid Build Coastguard Worker /// \param __a
2436*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double]. \n
2437*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:0] are written to bits [63:0] of the return value. \n
2438*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:128] are written to bits [191:128] of the return value.
2439*bed243d3SAndroid Build Coastguard Worker /// \param __b
2440*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double]. \n
2441*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:0] are written to bits [127:64] of the return value. \n
2442*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:128] are written to bits [255:192] of the return value. \n
2443*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2444*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_unpacklo_pd(__m256d __a,__m256d __b)2445*bed243d3SAndroid Build Coastguard Worker _mm256_unpacklo_pd(__m256d __a, __m256d __b)
2446*bed243d3SAndroid Build Coastguard Worker {
2447*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
2448*bed243d3SAndroid Build Coastguard Worker }
2449*bed243d3SAndroid Build Coastguard Worker 
2450*bed243d3SAndroid Build Coastguard Worker /// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the
2451*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float] and interleaves them into a 256-bit
2452*bed243d3SAndroid Build Coastguard Worker ///    vector of [8 x float].
2453*bed243d3SAndroid Build Coastguard Worker ///
2454*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2455*bed243d3SAndroid Build Coastguard Worker ///
2456*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.
2457*bed243d3SAndroid Build Coastguard Worker ///
2458*bed243d3SAndroid Build Coastguard Worker /// \param __a
2459*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2460*bed243d3SAndroid Build Coastguard Worker ///    Bits [95:64] are written to bits [31:0] of the return value. \n
2461*bed243d3SAndroid Build Coastguard Worker ///    Bits [127:96] are written to bits [95:64] of the return value. \n
2462*bed243d3SAndroid Build Coastguard Worker ///    Bits [223:192] are written to bits [159:128] of the return value. \n
2463*bed243d3SAndroid Build Coastguard Worker ///    Bits [255:224] are written to bits [223:192] of the return value.
2464*bed243d3SAndroid Build Coastguard Worker /// \param __b
2465*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2466*bed243d3SAndroid Build Coastguard Worker ///    Bits [95:64] are written to bits [63:32] of the return value. \n
2467*bed243d3SAndroid Build Coastguard Worker ///    Bits [127:96] are written to bits [127:96] of the return value. \n
2468*bed243d3SAndroid Build Coastguard Worker ///    Bits [223:192] are written to bits [191:160] of the return value. \n
2469*bed243d3SAndroid Build Coastguard Worker ///    Bits [255:224] are written to bits [255:224] of the return value.
2470*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2471*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_unpackhi_ps(__m256 __a,__m256 __b)2472*bed243d3SAndroid Build Coastguard Worker _mm256_unpackhi_ps(__m256 __a, __m256 __b)
2473*bed243d3SAndroid Build Coastguard Worker {
2474*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
2475*bed243d3SAndroid Build Coastguard Worker }
2476*bed243d3SAndroid Build Coastguard Worker 
2477*bed243d3SAndroid Build Coastguard Worker /// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the
2478*bed243d3SAndroid Build Coastguard Worker ///    two 256-bit vectors of [8 x float] and interleaves them into a 256-bit
2479*bed243d3SAndroid Build Coastguard Worker ///    vector of [8 x float].
2480*bed243d3SAndroid Build Coastguard Worker ///
2481*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2482*bed243d3SAndroid Build Coastguard Worker ///
2483*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.
2484*bed243d3SAndroid Build Coastguard Worker ///
2485*bed243d3SAndroid Build Coastguard Worker /// \param __a
2486*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2487*bed243d3SAndroid Build Coastguard Worker ///    Bits [31:0] are written to bits [31:0] of the return value. \n
2488*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:32] are written to bits [95:64] of the return value. \n
2489*bed243d3SAndroid Build Coastguard Worker ///    Bits [159:128] are written to bits [159:128] of the return value. \n
2490*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:160] are written to bits [223:192] of the return value.
2491*bed243d3SAndroid Build Coastguard Worker /// \param __b
2492*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. \n
2493*bed243d3SAndroid Build Coastguard Worker ///    Bits [31:0] are written to bits [63:32] of the return value. \n
2494*bed243d3SAndroid Build Coastguard Worker ///    Bits [63:32] are written to bits [127:96] of the return value. \n
2495*bed243d3SAndroid Build Coastguard Worker ///    Bits [159:128] are written to bits [191:160] of the return value. \n
2496*bed243d3SAndroid Build Coastguard Worker ///    Bits [191:160] are written to bits [255:224] of the return value.
2497*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2498*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_unpacklo_ps(__m256 __a,__m256 __b)2499*bed243d3SAndroid Build Coastguard Worker _mm256_unpacklo_ps(__m256 __a, __m256 __b)
2500*bed243d3SAndroid Build Coastguard Worker {
2501*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
2502*bed243d3SAndroid Build Coastguard Worker }
2503*bed243d3SAndroid Build Coastguard Worker 
2504*bed243d3SAndroid Build Coastguard Worker /* Bit Test */
2505*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [2 x double], perform an
2506*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision element in the
2507*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2508*bed243d3SAndroid Build Coastguard Worker ///    vector.
2509*bed243d3SAndroid Build Coastguard Worker ///
2510*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2511*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2512*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2513*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2514*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2515*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2516*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2517*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the ZF flag.
2518*bed243d3SAndroid Build Coastguard Worker ///
2519*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2520*bed243d3SAndroid Build Coastguard Worker ///
2521*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2522*bed243d3SAndroid Build Coastguard Worker ///
2523*bed243d3SAndroid Build Coastguard Worker /// \param __a
2524*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2525*bed243d3SAndroid Build Coastguard Worker /// \param __b
2526*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2527*bed243d3SAndroid Build Coastguard Worker /// \returns the ZF flag in the EFLAGS register.
2528*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testz_pd(__m128d __a,__m128d __b)2529*bed243d3SAndroid Build Coastguard Worker _mm_testz_pd(__m128d __a, __m128d __b)
2530*bed243d3SAndroid Build Coastguard Worker {
2531*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
2532*bed243d3SAndroid Build Coastguard Worker }
2533*bed243d3SAndroid Build Coastguard Worker 
2534*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [2 x double], perform an
2535*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision element in the
2536*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2537*bed243d3SAndroid Build Coastguard Worker ///    vector.
2538*bed243d3SAndroid Build Coastguard Worker ///
2539*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2540*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2541*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2542*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2543*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2544*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2545*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2546*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the CF flag.
2547*bed243d3SAndroid Build Coastguard Worker ///
2548*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2549*bed243d3SAndroid Build Coastguard Worker ///
2550*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2551*bed243d3SAndroid Build Coastguard Worker ///
2552*bed243d3SAndroid Build Coastguard Worker /// \param __a
2553*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2554*bed243d3SAndroid Build Coastguard Worker /// \param __b
2555*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2556*bed243d3SAndroid Build Coastguard Worker /// \returns the CF flag in the EFLAGS register.
2557*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testc_pd(__m128d __a,__m128d __b)2558*bed243d3SAndroid Build Coastguard Worker _mm_testc_pd(__m128d __a, __m128d __b)
2559*bed243d3SAndroid Build Coastguard Worker {
2560*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
2561*bed243d3SAndroid Build Coastguard Worker }
2562*bed243d3SAndroid Build Coastguard Worker 
2563*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [2 x double], perform an
2564*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision element in the
2565*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2566*bed243d3SAndroid Build Coastguard Worker ///    vector.
2567*bed243d3SAndroid Build Coastguard Worker ///
2568*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2569*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2570*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2571*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2572*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2573*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2574*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2575*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns 1 if both the ZF and CF flags are set to 0,
2576*bed243d3SAndroid Build Coastguard Worker ///    otherwise it returns 0.
2577*bed243d3SAndroid Build Coastguard Worker ///
2578*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2579*bed243d3SAndroid Build Coastguard Worker ///
2580*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2581*bed243d3SAndroid Build Coastguard Worker ///
2582*bed243d3SAndroid Build Coastguard Worker /// \param __a
2583*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2584*bed243d3SAndroid Build Coastguard Worker /// \param __b
2585*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
2586*bed243d3SAndroid Build Coastguard Worker /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2587*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testnzc_pd(__m128d __a,__m128d __b)2588*bed243d3SAndroid Build Coastguard Worker _mm_testnzc_pd(__m128d __a, __m128d __b)
2589*bed243d3SAndroid Build Coastguard Worker {
2590*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
2591*bed243d3SAndroid Build Coastguard Worker }
2592*bed243d3SAndroid Build Coastguard Worker 
2593*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [4 x float], perform an
2594*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision element in the
2595*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2596*bed243d3SAndroid Build Coastguard Worker ///    vector.
2597*bed243d3SAndroid Build Coastguard Worker ///
2598*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2599*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2600*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2601*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2602*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2603*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2604*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2605*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the ZF flag.
2606*bed243d3SAndroid Build Coastguard Worker ///
2607*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2608*bed243d3SAndroid Build Coastguard Worker ///
2609*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2610*bed243d3SAndroid Build Coastguard Worker ///
2611*bed243d3SAndroid Build Coastguard Worker /// \param __a
2612*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2613*bed243d3SAndroid Build Coastguard Worker /// \param __b
2614*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2615*bed243d3SAndroid Build Coastguard Worker /// \returns the ZF flag.
2616*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testz_ps(__m128 __a,__m128 __b)2617*bed243d3SAndroid Build Coastguard Worker _mm_testz_ps(__m128 __a, __m128 __b)
2618*bed243d3SAndroid Build Coastguard Worker {
2619*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
2620*bed243d3SAndroid Build Coastguard Worker }
2621*bed243d3SAndroid Build Coastguard Worker 
2622*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [4 x float], perform an
2623*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision element in the
2624*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2625*bed243d3SAndroid Build Coastguard Worker ///    vector.
2626*bed243d3SAndroid Build Coastguard Worker ///
2627*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2628*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2629*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2630*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2631*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2632*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2633*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2634*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the CF flag.
2635*bed243d3SAndroid Build Coastguard Worker ///
2636*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2637*bed243d3SAndroid Build Coastguard Worker ///
2638*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2639*bed243d3SAndroid Build Coastguard Worker ///
2640*bed243d3SAndroid Build Coastguard Worker /// \param __a
2641*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2642*bed243d3SAndroid Build Coastguard Worker /// \param __b
2643*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2644*bed243d3SAndroid Build Coastguard Worker /// \returns the CF flag.
2645*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testc_ps(__m128 __a,__m128 __b)2646*bed243d3SAndroid Build Coastguard Worker _mm_testc_ps(__m128 __a, __m128 __b)
2647*bed243d3SAndroid Build Coastguard Worker {
2648*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
2649*bed243d3SAndroid Build Coastguard Worker }
2650*bed243d3SAndroid Build Coastguard Worker 
2651*bed243d3SAndroid Build Coastguard Worker /// Given two 128-bit floating-point vectors of [4 x float], perform an
2652*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision element in the
2653*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2654*bed243d3SAndroid Build Coastguard Worker ///    vector.
2655*bed243d3SAndroid Build Coastguard Worker ///
2656*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2657*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2658*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2659*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2660*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2661*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2662*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2663*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns 1 if both the ZF and CF flags are set to 0,
2664*bed243d3SAndroid Build Coastguard Worker ///    otherwise it returns 0.
2665*bed243d3SAndroid Build Coastguard Worker ///
2666*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2667*bed243d3SAndroid Build Coastguard Worker ///
2668*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2669*bed243d3SAndroid Build Coastguard Worker ///
2670*bed243d3SAndroid Build Coastguard Worker /// \param __a
2671*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2672*bed243d3SAndroid Build Coastguard Worker /// \param __b
2673*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
2674*bed243d3SAndroid Build Coastguard Worker /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2675*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS128
_mm_testnzc_ps(__m128 __a,__m128 __b)2676*bed243d3SAndroid Build Coastguard Worker _mm_testnzc_ps(__m128 __a, __m128 __b)
2677*bed243d3SAndroid Build Coastguard Worker {
2678*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
2679*bed243d3SAndroid Build Coastguard Worker }
2680*bed243d3SAndroid Build Coastguard Worker 
2681*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [4 x double], perform an
2682*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision elements in the
2683*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding elements in the second source
2684*bed243d3SAndroid Build Coastguard Worker ///    vector.
2685*bed243d3SAndroid Build Coastguard Worker ///
2686*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2687*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2688*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2689*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2690*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2691*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2692*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2693*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the ZF flag.
2694*bed243d3SAndroid Build Coastguard Worker ///
2695*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2696*bed243d3SAndroid Build Coastguard Worker ///
2697*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2698*bed243d3SAndroid Build Coastguard Worker ///
2699*bed243d3SAndroid Build Coastguard Worker /// \param __a
2700*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2701*bed243d3SAndroid Build Coastguard Worker /// \param __b
2702*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2703*bed243d3SAndroid Build Coastguard Worker /// \returns the ZF flag.
2704*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_pd(__m256d __a,__m256d __b)2705*bed243d3SAndroid Build Coastguard Worker _mm256_testz_pd(__m256d __a, __m256d __b)
2706*bed243d3SAndroid Build Coastguard Worker {
2707*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
2708*bed243d3SAndroid Build Coastguard Worker }
2709*bed243d3SAndroid Build Coastguard Worker 
2710*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [4 x double], perform an
2711*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision elements in the
2712*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding elements in the second source
2713*bed243d3SAndroid Build Coastguard Worker ///    vector.
2714*bed243d3SAndroid Build Coastguard Worker ///
2715*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2716*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2717*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2718*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2719*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2720*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2721*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2722*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the CF flag.
2723*bed243d3SAndroid Build Coastguard Worker ///
2724*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2725*bed243d3SAndroid Build Coastguard Worker ///
2726*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2727*bed243d3SAndroid Build Coastguard Worker ///
2728*bed243d3SAndroid Build Coastguard Worker /// \param __a
2729*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2730*bed243d3SAndroid Build Coastguard Worker /// \param __b
2731*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2732*bed243d3SAndroid Build Coastguard Worker /// \returns the CF flag.
2733*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_pd(__m256d __a,__m256d __b)2734*bed243d3SAndroid Build Coastguard Worker _mm256_testc_pd(__m256d __a, __m256d __b)
2735*bed243d3SAndroid Build Coastguard Worker {
2736*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
2737*bed243d3SAndroid Build Coastguard Worker }
2738*bed243d3SAndroid Build Coastguard Worker 
2739*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [4 x double], perform an
2740*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the double-precision elements in the
2741*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding elements in the second source
2742*bed243d3SAndroid Build Coastguard Worker ///    vector.
2743*bed243d3SAndroid Build Coastguard Worker ///
2744*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2745*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2746*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2747*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2748*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of double-precision elements where the
2749*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2750*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2751*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns 1 if both the ZF and CF flags are set to 0,
2752*bed243d3SAndroid Build Coastguard Worker ///    otherwise it returns 0.
2753*bed243d3SAndroid Build Coastguard Worker ///
2754*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2755*bed243d3SAndroid Build Coastguard Worker ///
2756*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPD </c> instruction.
2757*bed243d3SAndroid Build Coastguard Worker ///
2758*bed243d3SAndroid Build Coastguard Worker /// \param __a
2759*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2760*bed243d3SAndroid Build Coastguard Worker /// \param __b
2761*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
2762*bed243d3SAndroid Build Coastguard Worker /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2763*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_pd(__m256d __a,__m256d __b)2764*bed243d3SAndroid Build Coastguard Worker _mm256_testnzc_pd(__m256d __a, __m256d __b)
2765*bed243d3SAndroid Build Coastguard Worker {
2766*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
2767*bed243d3SAndroid Build Coastguard Worker }
2768*bed243d3SAndroid Build Coastguard Worker 
2769*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [8 x float], perform an
2770*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision element in the
2771*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2772*bed243d3SAndroid Build Coastguard Worker ///    vector.
2773*bed243d3SAndroid Build Coastguard Worker ///
2774*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2775*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2776*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2777*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2778*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2779*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2780*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2781*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the ZF flag.
2782*bed243d3SAndroid Build Coastguard Worker ///
2783*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2784*bed243d3SAndroid Build Coastguard Worker ///
2785*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2786*bed243d3SAndroid Build Coastguard Worker ///
2787*bed243d3SAndroid Build Coastguard Worker /// \param __a
2788*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2789*bed243d3SAndroid Build Coastguard Worker /// \param __b
2790*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2791*bed243d3SAndroid Build Coastguard Worker /// \returns the ZF flag.
2792*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_ps(__m256 __a,__m256 __b)2793*bed243d3SAndroid Build Coastguard Worker _mm256_testz_ps(__m256 __a, __m256 __b)
2794*bed243d3SAndroid Build Coastguard Worker {
2795*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
2796*bed243d3SAndroid Build Coastguard Worker }
2797*bed243d3SAndroid Build Coastguard Worker 
2798*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [8 x float], perform an
2799*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision element in the
2800*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding element in the second source
2801*bed243d3SAndroid Build Coastguard Worker ///    vector.
2802*bed243d3SAndroid Build Coastguard Worker ///
2803*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2804*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2805*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2806*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2807*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2808*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2809*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2810*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the CF flag.
2811*bed243d3SAndroid Build Coastguard Worker ///
2812*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2813*bed243d3SAndroid Build Coastguard Worker ///
2814*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2815*bed243d3SAndroid Build Coastguard Worker ///
2816*bed243d3SAndroid Build Coastguard Worker /// \param __a
2817*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2818*bed243d3SAndroid Build Coastguard Worker /// \param __b
2819*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2820*bed243d3SAndroid Build Coastguard Worker /// \returns the CF flag.
2821*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_ps(__m256 __a,__m256 __b)2822*bed243d3SAndroid Build Coastguard Worker _mm256_testc_ps(__m256 __a, __m256 __b)
2823*bed243d3SAndroid Build Coastguard Worker {
2824*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
2825*bed243d3SAndroid Build Coastguard Worker }
2826*bed243d3SAndroid Build Coastguard Worker 
2827*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit floating-point vectors of [8 x float], perform an
2828*bed243d3SAndroid Build Coastguard Worker ///    element-by-element comparison of the single-precision elements in the
2829*bed243d3SAndroid Build Coastguard Worker ///    first source vector and the corresponding elements in the second source
2830*bed243d3SAndroid Build Coastguard Worker ///    vector.
2831*bed243d3SAndroid Build Coastguard Worker ///
2832*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2833*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2834*bed243d3SAndroid Build Coastguard Worker ///    sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the
2835*bed243d3SAndroid Build Coastguard Worker ///    ZF flag is set to 1. \n
2836*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of single-precision elements where the
2837*bed243d3SAndroid Build Coastguard Worker ///    sign-bit of the first element is 0 and the sign-bit of the second element
2838*bed243d3SAndroid Build Coastguard Worker ///    is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \n
2839*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns 1 if both the ZF and CF flags are set to 0,
2840*bed243d3SAndroid Build Coastguard Worker ///    otherwise it returns 0.
2841*bed243d3SAndroid Build Coastguard Worker ///
2842*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2843*bed243d3SAndroid Build Coastguard Worker ///
2844*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VTESTPS </c> instruction.
2845*bed243d3SAndroid Build Coastguard Worker ///
2846*bed243d3SAndroid Build Coastguard Worker /// \param __a
2847*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2848*bed243d3SAndroid Build Coastguard Worker /// \param __b
2849*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
2850*bed243d3SAndroid Build Coastguard Worker /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2851*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_ps(__m256 __a,__m256 __b)2852*bed243d3SAndroid Build Coastguard Worker _mm256_testnzc_ps(__m256 __a, __m256 __b)
2853*bed243d3SAndroid Build Coastguard Worker {
2854*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
2855*bed243d3SAndroid Build Coastguard Worker }
2856*bed243d3SAndroid Build Coastguard Worker 
2857*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2858*bed243d3SAndroid Build Coastguard Worker ///    of the two source vectors.
2859*bed243d3SAndroid Build Coastguard Worker ///
2860*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2861*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where both bits are 1, the ZF flag
2862*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the ZF flag is set to 1. \n
2863*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where the bit from the first source
2864*bed243d3SAndroid Build Coastguard Worker ///    vector is 0 and the bit from the second source vector is 1, the CF flag
2865*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the CF flag is set to 1. \n
2866*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the ZF flag.
2867*bed243d3SAndroid Build Coastguard Worker ///
2868*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2869*bed243d3SAndroid Build Coastguard Worker ///
2870*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST </c> instruction.
2871*bed243d3SAndroid Build Coastguard Worker ///
2872*bed243d3SAndroid Build Coastguard Worker /// \param __a
2873*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2874*bed243d3SAndroid Build Coastguard Worker /// \param __b
2875*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2876*bed243d3SAndroid Build Coastguard Worker /// \returns the ZF flag.
2877*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testz_si256(__m256i __a,__m256i __b)2878*bed243d3SAndroid Build Coastguard Worker _mm256_testz_si256(__m256i __a, __m256i __b)
2879*bed243d3SAndroid Build Coastguard Worker {
2880*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
2881*bed243d3SAndroid Build Coastguard Worker }
2882*bed243d3SAndroid Build Coastguard Worker 
2883*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2884*bed243d3SAndroid Build Coastguard Worker ///    of the two source vectors.
2885*bed243d3SAndroid Build Coastguard Worker ///
2886*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2887*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where both bits are 1, the ZF flag
2888*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the ZF flag is set to 1. \n
2889*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where the bit from the first source
2890*bed243d3SAndroid Build Coastguard Worker ///    vector is 0 and the bit from the second source vector is 1, the CF flag
2891*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the CF flag is set to 1. \n
2892*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns the value of the CF flag.
2893*bed243d3SAndroid Build Coastguard Worker ///
2894*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2895*bed243d3SAndroid Build Coastguard Worker ///
2896*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST </c> instruction.
2897*bed243d3SAndroid Build Coastguard Worker ///
2898*bed243d3SAndroid Build Coastguard Worker /// \param __a
2899*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2900*bed243d3SAndroid Build Coastguard Worker /// \param __b
2901*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2902*bed243d3SAndroid Build Coastguard Worker /// \returns the CF flag.
2903*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testc_si256(__m256i __a,__m256i __b)2904*bed243d3SAndroid Build Coastguard Worker _mm256_testc_si256(__m256i __a, __m256i __b)
2905*bed243d3SAndroid Build Coastguard Worker {
2906*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
2907*bed243d3SAndroid Build Coastguard Worker }
2908*bed243d3SAndroid Build Coastguard Worker 
2909*bed243d3SAndroid Build Coastguard Worker /// Given two 256-bit integer vectors, perform a bit-by-bit comparison
2910*bed243d3SAndroid Build Coastguard Worker ///    of the two source vectors.
2911*bed243d3SAndroid Build Coastguard Worker ///
2912*bed243d3SAndroid Build Coastguard Worker ///    The EFLAGS register is updated as follows: \n
2913*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where both bits are 1, the ZF flag
2914*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the ZF flag is set to 1. \n
2915*bed243d3SAndroid Build Coastguard Worker ///    If there is at least one pair of bits where the bit from the first source
2916*bed243d3SAndroid Build Coastguard Worker ///    vector is 0 and the bit from the second source vector is 1, the CF flag
2917*bed243d3SAndroid Build Coastguard Worker ///    is set to 0. Otherwise the CF flag is set to 1. \n
2918*bed243d3SAndroid Build Coastguard Worker ///    This intrinsic returns 1 if both the ZF and CF flags are set to 0,
2919*bed243d3SAndroid Build Coastguard Worker ///    otherwise it returns 0.
2920*bed243d3SAndroid Build Coastguard Worker ///
2921*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2922*bed243d3SAndroid Build Coastguard Worker ///
2923*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPTEST </c> instruction.
2924*bed243d3SAndroid Build Coastguard Worker ///
2925*bed243d3SAndroid Build Coastguard Worker /// \param __a
2926*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2927*bed243d3SAndroid Build Coastguard Worker /// \param __b
2928*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
2929*bed243d3SAndroid Build Coastguard Worker /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
2930*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_testnzc_si256(__m256i __a,__m256i __b)2931*bed243d3SAndroid Build Coastguard Worker _mm256_testnzc_si256(__m256i __a, __m256i __b)
2932*bed243d3SAndroid Build Coastguard Worker {
2933*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
2934*bed243d3SAndroid Build Coastguard Worker }
2935*bed243d3SAndroid Build Coastguard Worker 
2936*bed243d3SAndroid Build Coastguard Worker /* Vector extract sign mask */
2937*bed243d3SAndroid Build Coastguard Worker /// Extracts the sign bits of double-precision floating point elements
2938*bed243d3SAndroid Build Coastguard Worker ///    in a 256-bit vector of [4 x double] and writes them to the lower order
2939*bed243d3SAndroid Build Coastguard Worker ///    bits of the return value.
2940*bed243d3SAndroid Build Coastguard Worker ///
2941*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2942*bed243d3SAndroid Build Coastguard Worker ///
2943*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.
2944*bed243d3SAndroid Build Coastguard Worker ///
2945*bed243d3SAndroid Build Coastguard Worker /// \param __a
2946*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the double-precision
2947*bed243d3SAndroid Build Coastguard Worker ///    floating point values with sign bits to be extracted.
2948*bed243d3SAndroid Build Coastguard Worker /// \returns The sign bits from the operand, written to bits [3:0].
2949*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_pd(__m256d __a)2950*bed243d3SAndroid Build Coastguard Worker _mm256_movemask_pd(__m256d __a)
2951*bed243d3SAndroid Build Coastguard Worker {
2952*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_movmskpd256((__v4df)__a);
2953*bed243d3SAndroid Build Coastguard Worker }
2954*bed243d3SAndroid Build Coastguard Worker 
2955*bed243d3SAndroid Build Coastguard Worker /// Extracts the sign bits of single-precision floating point elements
2956*bed243d3SAndroid Build Coastguard Worker ///    in a 256-bit vector of [8 x float] and writes them to the lower order
2957*bed243d3SAndroid Build Coastguard Worker ///    bits of the return value.
2958*bed243d3SAndroid Build Coastguard Worker ///
2959*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2960*bed243d3SAndroid Build Coastguard Worker ///
2961*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.
2962*bed243d3SAndroid Build Coastguard Worker ///
2963*bed243d3SAndroid Build Coastguard Worker /// \param __a
2964*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the single-precision floating
2965*bed243d3SAndroid Build Coastguard Worker ///    point values with sign bits to be extracted.
2966*bed243d3SAndroid Build Coastguard Worker /// \returns The sign bits from the operand, written to bits [7:0].
2967*bed243d3SAndroid Build Coastguard Worker static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_ps(__m256 __a)2968*bed243d3SAndroid Build Coastguard Worker _mm256_movemask_ps(__m256 __a)
2969*bed243d3SAndroid Build Coastguard Worker {
2970*bed243d3SAndroid Build Coastguard Worker   return __builtin_ia32_movmskps256((__v8sf)__a);
2971*bed243d3SAndroid Build Coastguard Worker }
2972*bed243d3SAndroid Build Coastguard Worker 
2973*bed243d3SAndroid Build Coastguard Worker /* Vector __zero */
2974*bed243d3SAndroid Build Coastguard Worker /// Zeroes the contents of all XMM or YMM registers.
2975*bed243d3SAndroid Build Coastguard Worker ///
2976*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2977*bed243d3SAndroid Build Coastguard Worker ///
2978*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VZEROALL </c> instruction.
2979*bed243d3SAndroid Build Coastguard Worker static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx")))
_mm256_zeroall(void)2980*bed243d3SAndroid Build Coastguard Worker _mm256_zeroall(void)
2981*bed243d3SAndroid Build Coastguard Worker {
2982*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_vzeroall();
2983*bed243d3SAndroid Build Coastguard Worker }
2984*bed243d3SAndroid Build Coastguard Worker 
2985*bed243d3SAndroid Build Coastguard Worker /// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
2986*bed243d3SAndroid Build Coastguard Worker ///
2987*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
2988*bed243d3SAndroid Build Coastguard Worker ///
2989*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.
2990*bed243d3SAndroid Build Coastguard Worker static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx")))
_mm256_zeroupper(void)2991*bed243d3SAndroid Build Coastguard Worker _mm256_zeroupper(void)
2992*bed243d3SAndroid Build Coastguard Worker {
2993*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_vzeroupper();
2994*bed243d3SAndroid Build Coastguard Worker }
2995*bed243d3SAndroid Build Coastguard Worker 
2996*bed243d3SAndroid Build Coastguard Worker /* Vector load with broadcast */
2997*bed243d3SAndroid Build Coastguard Worker /// Loads a scalar single-precision floating point value from the
2998*bed243d3SAndroid Build Coastguard Worker ///    specified address pointed to by \a __a and broadcasts it to the elements
2999*bed243d3SAndroid Build Coastguard Worker ///    of a [4 x float] vector.
3000*bed243d3SAndroid Build Coastguard Worker ///
3001*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3002*bed243d3SAndroid Build Coastguard Worker ///
3003*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.
3004*bed243d3SAndroid Build Coastguard Worker ///
3005*bed243d3SAndroid Build Coastguard Worker /// \param __a
3006*bed243d3SAndroid Build Coastguard Worker ///    The single-precision floating point value to be broadcast.
3007*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set
3008*bed243d3SAndroid Build Coastguard Worker ///    equal to the broadcast value.
3009*bed243d3SAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_broadcast_ss(float const * __a)3010*bed243d3SAndroid Build Coastguard Worker _mm_broadcast_ss(float const *__a)
3011*bed243d3SAndroid Build Coastguard Worker {
3012*bed243d3SAndroid Build Coastguard Worker   struct __mm_broadcast_ss_struct {
3013*bed243d3SAndroid Build Coastguard Worker     float __f;
3014*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3015*bed243d3SAndroid Build Coastguard Worker   float __f = ((const struct __mm_broadcast_ss_struct*)__a)->__f;
3016*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m128){ __f, __f, __f, __f };
3017*bed243d3SAndroid Build Coastguard Worker }
3018*bed243d3SAndroid Build Coastguard Worker 
3019*bed243d3SAndroid Build Coastguard Worker /// Loads a scalar double-precision floating point value from the
3020*bed243d3SAndroid Build Coastguard Worker ///    specified address pointed to by \a __a and broadcasts it to the elements
3021*bed243d3SAndroid Build Coastguard Worker ///    of a [4 x double] vector.
3022*bed243d3SAndroid Build Coastguard Worker ///
3023*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3024*bed243d3SAndroid Build Coastguard Worker ///
3025*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.
3026*bed243d3SAndroid Build Coastguard Worker ///
3027*bed243d3SAndroid Build Coastguard Worker /// \param __a
3028*bed243d3SAndroid Build Coastguard Worker ///    The double-precision floating point value to be broadcast.
3029*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] whose 64-bit elements are set
3030*bed243d3SAndroid Build Coastguard Worker ///    equal to the broadcast value.
3031*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_broadcast_sd(double const * __a)3032*bed243d3SAndroid Build Coastguard Worker _mm256_broadcast_sd(double const *__a)
3033*bed243d3SAndroid Build Coastguard Worker {
3034*bed243d3SAndroid Build Coastguard Worker   struct __mm256_broadcast_sd_struct {
3035*bed243d3SAndroid Build Coastguard Worker     double __d;
3036*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3037*bed243d3SAndroid Build Coastguard Worker   double __d = ((const struct __mm256_broadcast_sd_struct*)__a)->__d;
3038*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };
3039*bed243d3SAndroid Build Coastguard Worker }
3040*bed243d3SAndroid Build Coastguard Worker 
3041*bed243d3SAndroid Build Coastguard Worker /// Loads a scalar single-precision floating point value from the
3042*bed243d3SAndroid Build Coastguard Worker ///    specified address pointed to by \a __a and broadcasts it to the elements
3043*bed243d3SAndroid Build Coastguard Worker ///    of a [8 x float] vector.
3044*bed243d3SAndroid Build Coastguard Worker ///
3045*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3046*bed243d3SAndroid Build Coastguard Worker ///
3047*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.
3048*bed243d3SAndroid Build Coastguard Worker ///
3049*bed243d3SAndroid Build Coastguard Worker /// \param __a
3050*bed243d3SAndroid Build Coastguard Worker ///    The single-precision floating point value to be broadcast.
3051*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] whose 32-bit elements are set
3052*bed243d3SAndroid Build Coastguard Worker ///    equal to the broadcast value.
3053*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_ss(float const * __a)3054*bed243d3SAndroid Build Coastguard Worker _mm256_broadcast_ss(float const *__a)
3055*bed243d3SAndroid Build Coastguard Worker {
3056*bed243d3SAndroid Build Coastguard Worker   struct __mm256_broadcast_ss_struct {
3057*bed243d3SAndroid Build Coastguard Worker     float __f;
3058*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3059*bed243d3SAndroid Build Coastguard Worker   float __f = ((const struct __mm256_broadcast_ss_struct*)__a)->__f;
3060*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
3061*bed243d3SAndroid Build Coastguard Worker }
3062*bed243d3SAndroid Build Coastguard Worker 
3063*bed243d3SAndroid Build Coastguard Worker /// Loads the data from a 128-bit vector of [2 x double] from the
3064*bed243d3SAndroid Build Coastguard Worker ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3065*bed243d3SAndroid Build Coastguard Worker ///    elements in a 256-bit vector of [4 x double].
3066*bed243d3SAndroid Build Coastguard Worker ///
3067*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3068*bed243d3SAndroid Build Coastguard Worker ///
3069*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.
3070*bed243d3SAndroid Build Coastguard Worker ///
3071*bed243d3SAndroid Build Coastguard Worker /// \param __a
3072*bed243d3SAndroid Build Coastguard Worker ///    The 128-bit vector of [2 x double] to be broadcast.
3073*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] whose 128-bit elements are set
3074*bed243d3SAndroid Build Coastguard Worker ///    equal to the broadcast value.
3075*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_broadcast_pd(__m128d const * __a)3076*bed243d3SAndroid Build Coastguard Worker _mm256_broadcast_pd(__m128d const *__a)
3077*bed243d3SAndroid Build Coastguard Worker {
3078*bed243d3SAndroid Build Coastguard Worker   __m128d __b = _mm_loadu_pd((const double *)__a);
3079*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,
3080*bed243d3SAndroid Build Coastguard Worker                                           0, 1, 0, 1);
3081*bed243d3SAndroid Build Coastguard Worker }
3082*bed243d3SAndroid Build Coastguard Worker 
3083*bed243d3SAndroid Build Coastguard Worker /// Loads the data from a 128-bit vector of [4 x float] from the
3084*bed243d3SAndroid Build Coastguard Worker ///    specified address pointed to by \a __a and broadcasts it to 128-bit
3085*bed243d3SAndroid Build Coastguard Worker ///    elements in a 256-bit vector of [8 x float].
3086*bed243d3SAndroid Build Coastguard Worker ///
3087*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3088*bed243d3SAndroid Build Coastguard Worker ///
3089*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.
3090*bed243d3SAndroid Build Coastguard Worker ///
3091*bed243d3SAndroid Build Coastguard Worker /// \param __a
3092*bed243d3SAndroid Build Coastguard Worker ///    The 128-bit vector of [4 x float] to be broadcast.
3093*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] whose 128-bit elements are set
3094*bed243d3SAndroid Build Coastguard Worker ///    equal to the broadcast value.
3095*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_broadcast_ps(__m128 const * __a)3096*bed243d3SAndroid Build Coastguard Worker _mm256_broadcast_ps(__m128 const *__a)
3097*bed243d3SAndroid Build Coastguard Worker {
3098*bed243d3SAndroid Build Coastguard Worker   __m128 __b = _mm_loadu_ps((const float *)__a);
3099*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,
3100*bed243d3SAndroid Build Coastguard Worker                                          0, 1, 2, 3, 0, 1, 2, 3);
3101*bed243d3SAndroid Build Coastguard Worker }
3102*bed243d3SAndroid Build Coastguard Worker 
3103*bed243d3SAndroid Build Coastguard Worker /* SIMD load ops */
3104*bed243d3SAndroid Build Coastguard Worker /// Loads 4 double-precision floating point values from a 32-byte aligned
3105*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a vector of [4 x double].
3106*bed243d3SAndroid Build Coastguard Worker ///
3107*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3108*bed243d3SAndroid Build Coastguard Worker ///
3109*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.
3110*bed243d3SAndroid Build Coastguard Worker ///
3111*bed243d3SAndroid Build Coastguard Worker /// \param __p
3112*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a memory location containing
3113*bed243d3SAndroid Build Coastguard Worker ///    double-precision floating point values.
3114*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the moved values.
3115*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_load_pd(double const * __p)3116*bed243d3SAndroid Build Coastguard Worker _mm256_load_pd(double const *__p)
3117*bed243d3SAndroid Build Coastguard Worker {
3118*bed243d3SAndroid Build Coastguard Worker   return *(const __m256d *)__p;
3119*bed243d3SAndroid Build Coastguard Worker }
3120*bed243d3SAndroid Build Coastguard Worker 
3121*bed243d3SAndroid Build Coastguard Worker /// Loads 8 single-precision floating point values from a 32-byte aligned
3122*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a vector of [8 x float].
3123*bed243d3SAndroid Build Coastguard Worker ///
3124*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3125*bed243d3SAndroid Build Coastguard Worker ///
3126*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.
3127*bed243d3SAndroid Build Coastguard Worker ///
3128*bed243d3SAndroid Build Coastguard Worker /// \param __p
3129*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a memory location containing float values.
3130*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the moved values.
3131*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_load_ps(float const * __p)3132*bed243d3SAndroid Build Coastguard Worker _mm256_load_ps(float const *__p)
3133*bed243d3SAndroid Build Coastguard Worker {
3134*bed243d3SAndroid Build Coastguard Worker   return *(const __m256 *)__p;
3135*bed243d3SAndroid Build Coastguard Worker }
3136*bed243d3SAndroid Build Coastguard Worker 
3137*bed243d3SAndroid Build Coastguard Worker /// Loads 4 double-precision floating point values from an unaligned
3138*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a vector of [4 x double].
3139*bed243d3SAndroid Build Coastguard Worker ///
3140*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3141*bed243d3SAndroid Build Coastguard Worker ///
3142*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.
3143*bed243d3SAndroid Build Coastguard Worker ///
3144*bed243d3SAndroid Build Coastguard Worker /// \param __p
3145*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location containing double-precision floating
3146*bed243d3SAndroid Build Coastguard Worker ///    point values.
3147*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the moved values.
3148*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu_pd(double const * __p)3149*bed243d3SAndroid Build Coastguard Worker _mm256_loadu_pd(double const *__p)
3150*bed243d3SAndroid Build Coastguard Worker {
3151*bed243d3SAndroid Build Coastguard Worker   struct __loadu_pd {
3152*bed243d3SAndroid Build Coastguard Worker     __m256d_u __v;
3153*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3154*bed243d3SAndroid Build Coastguard Worker   return ((const struct __loadu_pd*)__p)->__v;
3155*bed243d3SAndroid Build Coastguard Worker }
3156*bed243d3SAndroid Build Coastguard Worker 
3157*bed243d3SAndroid Build Coastguard Worker /// Loads 8 single-precision floating point values from an unaligned
3158*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a vector of [8 x float].
3159*bed243d3SAndroid Build Coastguard Worker ///
3160*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3161*bed243d3SAndroid Build Coastguard Worker ///
3162*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.
3163*bed243d3SAndroid Build Coastguard Worker ///
3164*bed243d3SAndroid Build Coastguard Worker /// \param __p
3165*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location containing single-precision floating
3166*bed243d3SAndroid Build Coastguard Worker ///    point values.
3167*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the moved values.
3168*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu_ps(float const * __p)3169*bed243d3SAndroid Build Coastguard Worker _mm256_loadu_ps(float const *__p)
3170*bed243d3SAndroid Build Coastguard Worker {
3171*bed243d3SAndroid Build Coastguard Worker   struct __loadu_ps {
3172*bed243d3SAndroid Build Coastguard Worker     __m256_u __v;
3173*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3174*bed243d3SAndroid Build Coastguard Worker   return ((const struct __loadu_ps*)__p)->__v;
3175*bed243d3SAndroid Build Coastguard Worker }
3176*bed243d3SAndroid Build Coastguard Worker 
3177*bed243d3SAndroid Build Coastguard Worker /// Loads 256 bits of integer data from a 32-byte aligned memory
3178*bed243d3SAndroid Build Coastguard Worker ///    location pointed to by \a __p into elements of a 256-bit integer vector.
3179*bed243d3SAndroid Build Coastguard Worker ///
3180*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3181*bed243d3SAndroid Build Coastguard Worker ///
3182*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.
3183*bed243d3SAndroid Build Coastguard Worker ///
3184*bed243d3SAndroid Build Coastguard Worker /// \param __p
3185*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a 256-bit integer vector containing integer
3186*bed243d3SAndroid Build Coastguard Worker ///    values.
3187*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the moved values.
3188*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_load_si256(__m256i const * __p)3189*bed243d3SAndroid Build Coastguard Worker _mm256_load_si256(__m256i const *__p)
3190*bed243d3SAndroid Build Coastguard Worker {
3191*bed243d3SAndroid Build Coastguard Worker   return *__p;
3192*bed243d3SAndroid Build Coastguard Worker }
3193*bed243d3SAndroid Build Coastguard Worker 
3194*bed243d3SAndroid Build Coastguard Worker /// Loads 256 bits of integer data from an unaligned memory location
3195*bed243d3SAndroid Build Coastguard Worker ///    pointed to by \a __p into a 256-bit integer vector.
3196*bed243d3SAndroid Build Coastguard Worker ///
3197*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3198*bed243d3SAndroid Build Coastguard Worker ///
3199*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.
3200*bed243d3SAndroid Build Coastguard Worker ///
3201*bed243d3SAndroid Build Coastguard Worker /// \param __p
3202*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 256-bit integer vector containing integer values.
3203*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the moved values.
3204*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu_si256(__m256i_u const * __p)3205*bed243d3SAndroid Build Coastguard Worker _mm256_loadu_si256(__m256i_u const *__p)
3206*bed243d3SAndroid Build Coastguard Worker {
3207*bed243d3SAndroid Build Coastguard Worker   struct __loadu_si256 {
3208*bed243d3SAndroid Build Coastguard Worker     __m256i_u __v;
3209*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3210*bed243d3SAndroid Build Coastguard Worker   return ((const struct __loadu_si256*)__p)->__v;
3211*bed243d3SAndroid Build Coastguard Worker }
3212*bed243d3SAndroid Build Coastguard Worker 
3213*bed243d3SAndroid Build Coastguard Worker /// Loads 256 bits of integer data from an unaligned memory location
3214*bed243d3SAndroid Build Coastguard Worker ///    pointed to by \a __p into a 256-bit integer vector. This intrinsic may
3215*bed243d3SAndroid Build Coastguard Worker ///    perform better than \c _mm256_loadu_si256 when the data crosses a cache
3216*bed243d3SAndroid Build Coastguard Worker ///    line boundary.
3217*bed243d3SAndroid Build Coastguard Worker ///
3218*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3219*bed243d3SAndroid Build Coastguard Worker ///
3220*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
3221*bed243d3SAndroid Build Coastguard Worker ///
3222*bed243d3SAndroid Build Coastguard Worker /// \param __p
3223*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 256-bit integer vector containing integer values.
3224*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the moved values.
3225*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_lddqu_si256(__m256i_u const * __p)3226*bed243d3SAndroid Build Coastguard Worker _mm256_lddqu_si256(__m256i_u const *__p)
3227*bed243d3SAndroid Build Coastguard Worker {
3228*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
3229*bed243d3SAndroid Build Coastguard Worker }
3230*bed243d3SAndroid Build Coastguard Worker 
3231*bed243d3SAndroid Build Coastguard Worker /* SIMD store ops */
3232*bed243d3SAndroid Build Coastguard Worker /// Stores double-precision floating point values from a 256-bit vector
3233*bed243d3SAndroid Build Coastguard Worker ///    of [4 x double] to a 32-byte aligned memory location pointed to by
3234*bed243d3SAndroid Build Coastguard Worker ///    \a __p.
3235*bed243d3SAndroid Build Coastguard Worker ///
3236*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3237*bed243d3SAndroid Build Coastguard Worker ///
3238*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.
3239*bed243d3SAndroid Build Coastguard Worker ///
3240*bed243d3SAndroid Build Coastguard Worker /// \param __p
3241*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a memory location that will receive the
3242*bed243d3SAndroid Build Coastguard Worker ///    double-precision floaing point values.
3243*bed243d3SAndroid Build Coastguard Worker /// \param __a
3244*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the values to be moved.
3245*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_pd(double * __p,__m256d __a)3246*bed243d3SAndroid Build Coastguard Worker _mm256_store_pd(double *__p, __m256d __a)
3247*bed243d3SAndroid Build Coastguard Worker {
3248*bed243d3SAndroid Build Coastguard Worker   *(__m256d *)__p = __a;
3249*bed243d3SAndroid Build Coastguard Worker }
3250*bed243d3SAndroid Build Coastguard Worker 
3251*bed243d3SAndroid Build Coastguard Worker /// Stores single-precision floating point values from a 256-bit vector
3252*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float] to a 32-byte aligned memory location pointed to by \a __p.
3253*bed243d3SAndroid Build Coastguard Worker ///
3254*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3255*bed243d3SAndroid Build Coastguard Worker ///
3256*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.
3257*bed243d3SAndroid Build Coastguard Worker ///
3258*bed243d3SAndroid Build Coastguard Worker /// \param __p
3259*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a memory location that will receive the
3260*bed243d3SAndroid Build Coastguard Worker ///    float values.
3261*bed243d3SAndroid Build Coastguard Worker /// \param __a
3262*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the values to be moved.
3263*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_ps(float * __p,__m256 __a)3264*bed243d3SAndroid Build Coastguard Worker _mm256_store_ps(float *__p, __m256 __a)
3265*bed243d3SAndroid Build Coastguard Worker {
3266*bed243d3SAndroid Build Coastguard Worker   *(__m256 *)__p = __a;
3267*bed243d3SAndroid Build Coastguard Worker }
3268*bed243d3SAndroid Build Coastguard Worker 
3269*bed243d3SAndroid Build Coastguard Worker /// Stores double-precision floating point values from a 256-bit vector
3270*bed243d3SAndroid Build Coastguard Worker ///    of [4 x double] to an unaligned memory location pointed to by \a __p.
3271*bed243d3SAndroid Build Coastguard Worker ///
3272*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3273*bed243d3SAndroid Build Coastguard Worker ///
3274*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.
3275*bed243d3SAndroid Build Coastguard Worker ///
3276*bed243d3SAndroid Build Coastguard Worker /// \param __p
3277*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the double-precision
3278*bed243d3SAndroid Build Coastguard Worker ///    floating point values.
3279*bed243d3SAndroid Build Coastguard Worker /// \param __a
3280*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the values to be moved.
3281*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_pd(double * __p,__m256d __a)3282*bed243d3SAndroid Build Coastguard Worker _mm256_storeu_pd(double *__p, __m256d __a)
3283*bed243d3SAndroid Build Coastguard Worker {
3284*bed243d3SAndroid Build Coastguard Worker   struct __storeu_pd {
3285*bed243d3SAndroid Build Coastguard Worker     __m256d_u __v;
3286*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3287*bed243d3SAndroid Build Coastguard Worker   ((struct __storeu_pd*)__p)->__v = __a;
3288*bed243d3SAndroid Build Coastguard Worker }
3289*bed243d3SAndroid Build Coastguard Worker 
3290*bed243d3SAndroid Build Coastguard Worker /// Stores single-precision floating point values from a 256-bit vector
3291*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float] to an unaligned memory location pointed to by \a __p.
3292*bed243d3SAndroid Build Coastguard Worker ///
3293*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3294*bed243d3SAndroid Build Coastguard Worker ///
3295*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.
3296*bed243d3SAndroid Build Coastguard Worker ///
3297*bed243d3SAndroid Build Coastguard Worker /// \param __p
3298*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the float values.
3299*bed243d3SAndroid Build Coastguard Worker /// \param __a
3300*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the values to be moved.
3301*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_ps(float * __p,__m256 __a)3302*bed243d3SAndroid Build Coastguard Worker _mm256_storeu_ps(float *__p, __m256 __a)
3303*bed243d3SAndroid Build Coastguard Worker {
3304*bed243d3SAndroid Build Coastguard Worker   struct __storeu_ps {
3305*bed243d3SAndroid Build Coastguard Worker     __m256_u __v;
3306*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3307*bed243d3SAndroid Build Coastguard Worker   ((struct __storeu_ps*)__p)->__v = __a;
3308*bed243d3SAndroid Build Coastguard Worker }
3309*bed243d3SAndroid Build Coastguard Worker 
3310*bed243d3SAndroid Build Coastguard Worker /// Stores integer values from a 256-bit integer vector to a 32-byte
3311*bed243d3SAndroid Build Coastguard Worker ///    aligned memory location pointed to by \a __p.
3312*bed243d3SAndroid Build Coastguard Worker ///
3313*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3314*bed243d3SAndroid Build Coastguard Worker ///
3315*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.
3316*bed243d3SAndroid Build Coastguard Worker ///
3317*bed243d3SAndroid Build Coastguard Worker /// \param __p
3318*bed243d3SAndroid Build Coastguard Worker ///    A 32-byte aligned pointer to a memory location that will receive the
3319*bed243d3SAndroid Build Coastguard Worker ///    integer values.
3320*bed243d3SAndroid Build Coastguard Worker /// \param __a
3321*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector containing the values to be moved.
3322*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_store_si256(__m256i * __p,__m256i __a)3323*bed243d3SAndroid Build Coastguard Worker _mm256_store_si256(__m256i *__p, __m256i __a)
3324*bed243d3SAndroid Build Coastguard Worker {
3325*bed243d3SAndroid Build Coastguard Worker   *__p = __a;
3326*bed243d3SAndroid Build Coastguard Worker }
3327*bed243d3SAndroid Build Coastguard Worker 
3328*bed243d3SAndroid Build Coastguard Worker /// Stores integer values from a 256-bit integer vector to an unaligned
3329*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p.
3330*bed243d3SAndroid Build Coastguard Worker ///
3331*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3332*bed243d3SAndroid Build Coastguard Worker ///
3333*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.
3334*bed243d3SAndroid Build Coastguard Worker ///
3335*bed243d3SAndroid Build Coastguard Worker /// \param __p
3336*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the integer values.
3337*bed243d3SAndroid Build Coastguard Worker /// \param __a
3338*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector containing the values to be moved.
3339*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu_si256(__m256i_u * __p,__m256i __a)3340*bed243d3SAndroid Build Coastguard Worker _mm256_storeu_si256(__m256i_u *__p, __m256i __a)
3341*bed243d3SAndroid Build Coastguard Worker {
3342*bed243d3SAndroid Build Coastguard Worker   struct __storeu_si256 {
3343*bed243d3SAndroid Build Coastguard Worker     __m256i_u __v;
3344*bed243d3SAndroid Build Coastguard Worker   } __attribute__((__packed__, __may_alias__));
3345*bed243d3SAndroid Build Coastguard Worker   ((struct __storeu_si256*)__p)->__v = __a;
3346*bed243d3SAndroid Build Coastguard Worker }
3347*bed243d3SAndroid Build Coastguard Worker 
3348*bed243d3SAndroid Build Coastguard Worker /* Conditional load ops */
3349*bed243d3SAndroid Build Coastguard Worker /// Conditionally loads double-precision floating point elements from a
3350*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a 128-bit vector of
3351*bed243d3SAndroid Build Coastguard Worker ///    [2 x double], depending on the mask bits associated with each data
3352*bed243d3SAndroid Build Coastguard Worker ///    element.
3353*bed243d3SAndroid Build Coastguard Worker ///
3354*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3355*bed243d3SAndroid Build Coastguard Worker ///
3356*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.
3357*bed243d3SAndroid Build Coastguard Worker ///
3358*bed243d3SAndroid Build Coastguard Worker /// \param __p
3359*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that contains the double-precision
3360*bed243d3SAndroid Build Coastguard Worker ///    floating point values.
3361*bed243d3SAndroid Build Coastguard Worker /// \param __m
3362*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector containing the mask. The most significant bit of
3363*bed243d3SAndroid Build Coastguard Worker ///    each data element represents the mask bits. If a mask bit is zero, the
3364*bed243d3SAndroid Build Coastguard Worker ///    corresponding value in the memory location is not loaded and the
3365*bed243d3SAndroid Build Coastguard Worker ///    corresponding field in the return value is set to zero.
3366*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the loaded values.
3367*bed243d3SAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS128
_mm_maskload_pd(double const * __p,__m128i __m)3368*bed243d3SAndroid Build Coastguard Worker _mm_maskload_pd(double const *__p, __m128i __m)
3369*bed243d3SAndroid Build Coastguard Worker {
3370*bed243d3SAndroid Build Coastguard Worker   return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
3371*bed243d3SAndroid Build Coastguard Worker }
3372*bed243d3SAndroid Build Coastguard Worker 
3373*bed243d3SAndroid Build Coastguard Worker /// Conditionally loads double-precision floating point elements from a
3374*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a 256-bit vector of
3375*bed243d3SAndroid Build Coastguard Worker ///    [4 x double], depending on the mask bits associated with each data
3376*bed243d3SAndroid Build Coastguard Worker ///    element.
3377*bed243d3SAndroid Build Coastguard Worker ///
3378*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3379*bed243d3SAndroid Build Coastguard Worker ///
3380*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.
3381*bed243d3SAndroid Build Coastguard Worker ///
3382*bed243d3SAndroid Build Coastguard Worker /// \param __p
3383*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that contains the double-precision
3384*bed243d3SAndroid Build Coastguard Worker ///    floating point values.
3385*bed243d3SAndroid Build Coastguard Worker /// \param __m
3386*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [4 x quadword] containing the mask. The most
3387*bed243d3SAndroid Build Coastguard Worker ///    significant bit of each quadword element represents the mask bits. If a
3388*bed243d3SAndroid Build Coastguard Worker ///    mask bit is zero, the corresponding value in the memory location is not
3389*bed243d3SAndroid Build Coastguard Worker ///    loaded and the corresponding field in the return value is set to zero.
3390*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the loaded values.
3391*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_maskload_pd(double const * __p,__m256i __m)3392*bed243d3SAndroid Build Coastguard Worker _mm256_maskload_pd(double const *__p, __m256i __m)
3393*bed243d3SAndroid Build Coastguard Worker {
3394*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
3395*bed243d3SAndroid Build Coastguard Worker                                                (__v4di)__m);
3396*bed243d3SAndroid Build Coastguard Worker }
3397*bed243d3SAndroid Build Coastguard Worker 
3398*bed243d3SAndroid Build Coastguard Worker /// Conditionally loads single-precision floating point elements from a
3399*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a 128-bit vector of
3400*bed243d3SAndroid Build Coastguard Worker ///    [4 x float], depending on the mask bits associated with each data
3401*bed243d3SAndroid Build Coastguard Worker ///    element.
3402*bed243d3SAndroid Build Coastguard Worker ///
3403*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3404*bed243d3SAndroid Build Coastguard Worker ///
3405*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.
3406*bed243d3SAndroid Build Coastguard Worker ///
3407*bed243d3SAndroid Build Coastguard Worker /// \param __p
3408*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that contains the single-precision
3409*bed243d3SAndroid Build Coastguard Worker ///    floating point values.
3410*bed243d3SAndroid Build Coastguard Worker /// \param __m
3411*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector containing the mask. The most significant bit of
3412*bed243d3SAndroid Build Coastguard Worker ///    each data element represents the mask bits. If a mask bit is zero, the
3413*bed243d3SAndroid Build Coastguard Worker ///    corresponding value in the memory location is not loaded and the
3414*bed243d3SAndroid Build Coastguard Worker ///    corresponding field in the return value is set to zero.
3415*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the loaded values.
3416*bed243d3SAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_maskload_ps(float const * __p,__m128i __m)3417*bed243d3SAndroid Build Coastguard Worker _mm_maskload_ps(float const *__p, __m128i __m)
3418*bed243d3SAndroid Build Coastguard Worker {
3419*bed243d3SAndroid Build Coastguard Worker   return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
3420*bed243d3SAndroid Build Coastguard Worker }
3421*bed243d3SAndroid Build Coastguard Worker 
3422*bed243d3SAndroid Build Coastguard Worker /// Conditionally loads single-precision floating point elements from a
3423*bed243d3SAndroid Build Coastguard Worker ///    memory location pointed to by \a __p into a 256-bit vector of
3424*bed243d3SAndroid Build Coastguard Worker ///    [8 x float], depending on the mask bits associated with each data
3425*bed243d3SAndroid Build Coastguard Worker ///    element.
3426*bed243d3SAndroid Build Coastguard Worker ///
3427*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3428*bed243d3SAndroid Build Coastguard Worker ///
3429*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.
3430*bed243d3SAndroid Build Coastguard Worker ///
3431*bed243d3SAndroid Build Coastguard Worker /// \param __p
3432*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that contains the single-precision
3433*bed243d3SAndroid Build Coastguard Worker ///    floating point values.
3434*bed243d3SAndroid Build Coastguard Worker /// \param __m
3435*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [8 x dword] containing the mask. The most
3436*bed243d3SAndroid Build Coastguard Worker ///    significant bit of each dword element represents the mask bits. If a mask
3437*bed243d3SAndroid Build Coastguard Worker ///    bit is zero, the corresponding value in the memory location is not loaded
3438*bed243d3SAndroid Build Coastguard Worker ///    and the corresponding field in the return value is set to zero.
3439*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the loaded values.
3440*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_maskload_ps(float const * __p,__m256i __m)3441*bed243d3SAndroid Build Coastguard Worker _mm256_maskload_ps(float const *__p, __m256i __m)
3442*bed243d3SAndroid Build Coastguard Worker {
3443*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);
3444*bed243d3SAndroid Build Coastguard Worker }
3445*bed243d3SAndroid Build Coastguard Worker 
3446*bed243d3SAndroid Build Coastguard Worker /* Conditional store ops */
3447*bed243d3SAndroid Build Coastguard Worker /// Moves single-precision floating point values from a 256-bit vector
3448*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float] to a memory location pointed to by \a __p, according to
3449*bed243d3SAndroid Build Coastguard Worker ///    the specified mask.
3450*bed243d3SAndroid Build Coastguard Worker ///
3451*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3452*bed243d3SAndroid Build Coastguard Worker ///
3453*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.
3454*bed243d3SAndroid Build Coastguard Worker ///
3455*bed243d3SAndroid Build Coastguard Worker /// \param __p
3456*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the float values.
3457*bed243d3SAndroid Build Coastguard Worker /// \param __m
3458*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [8 x dword] containing the mask. The most
3459*bed243d3SAndroid Build Coastguard Worker ///    significant bit of each dword element in the mask vector represents the
3460*bed243d3SAndroid Build Coastguard Worker ///    mask bits. If a mask bit is zero, the corresponding value from vector
3461*bed243d3SAndroid Build Coastguard Worker ///    \a __a is not stored and the corresponding field in the memory location
3462*bed243d3SAndroid Build Coastguard Worker ///    pointed to by \a __p is not changed.
3463*bed243d3SAndroid Build Coastguard Worker /// \param __a
3464*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the values to be stored.
3465*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_maskstore_ps(float * __p,__m256i __m,__m256 __a)3466*bed243d3SAndroid Build Coastguard Worker _mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
3467*bed243d3SAndroid Build Coastguard Worker {
3468*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
3469*bed243d3SAndroid Build Coastguard Worker }
3470*bed243d3SAndroid Build Coastguard Worker 
3471*bed243d3SAndroid Build Coastguard Worker /// Moves double-precision values from a 128-bit vector of [2 x double]
3472*bed243d3SAndroid Build Coastguard Worker ///    to a memory location pointed to by \a __p, according to the specified
3473*bed243d3SAndroid Build Coastguard Worker ///    mask.
3474*bed243d3SAndroid Build Coastguard Worker ///
3475*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3476*bed243d3SAndroid Build Coastguard Worker ///
3477*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.
3478*bed243d3SAndroid Build Coastguard Worker ///
3479*bed243d3SAndroid Build Coastguard Worker /// \param __p
3480*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the float values.
3481*bed243d3SAndroid Build Coastguard Worker /// \param __m
3482*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector containing the mask. The most significant bit of
3483*bed243d3SAndroid Build Coastguard Worker ///    each field in the mask vector represents the mask bits. If a mask bit is
3484*bed243d3SAndroid Build Coastguard Worker ///    zero, the corresponding value from vector \a __a is not stored and the
3485*bed243d3SAndroid Build Coastguard Worker ///    corresponding field in the memory location pointed to by \a __p is not
3486*bed243d3SAndroid Build Coastguard Worker ///    changed.
3487*bed243d3SAndroid Build Coastguard Worker /// \param __a
3488*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double] containing the values to be stored.
3489*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS128
_mm_maskstore_pd(double * __p,__m128i __m,__m128d __a)3490*bed243d3SAndroid Build Coastguard Worker _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
3491*bed243d3SAndroid Build Coastguard Worker {
3492*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
3493*bed243d3SAndroid Build Coastguard Worker }
3494*bed243d3SAndroid Build Coastguard Worker 
3495*bed243d3SAndroid Build Coastguard Worker /// Moves double-precision values from a 256-bit vector of [4 x double]
3496*bed243d3SAndroid Build Coastguard Worker ///    to a memory location pointed to by \a __p, according to the specified
3497*bed243d3SAndroid Build Coastguard Worker ///    mask.
3498*bed243d3SAndroid Build Coastguard Worker ///
3499*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3500*bed243d3SAndroid Build Coastguard Worker ///
3501*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.
3502*bed243d3SAndroid Build Coastguard Worker ///
3503*bed243d3SAndroid Build Coastguard Worker /// \param __p
3504*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the float values.
3505*bed243d3SAndroid Build Coastguard Worker /// \param __m
3506*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector of [4 x quadword] containing the mask. The most
3507*bed243d3SAndroid Build Coastguard Worker ///    significant bit of each quadword element in the mask vector represents
3508*bed243d3SAndroid Build Coastguard Worker ///    the mask bits. If a mask bit is zero, the corresponding value from vector
3509*bed243d3SAndroid Build Coastguard Worker ///    __a is not stored and the corresponding field in the memory location
3510*bed243d3SAndroid Build Coastguard Worker ///    pointed to by \a __p is not changed.
3511*bed243d3SAndroid Build Coastguard Worker /// \param __a
3512*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the values to be stored.
3513*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_maskstore_pd(double * __p,__m256i __m,__m256d __a)3514*bed243d3SAndroid Build Coastguard Worker _mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
3515*bed243d3SAndroid Build Coastguard Worker {
3516*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
3517*bed243d3SAndroid Build Coastguard Worker }
3518*bed243d3SAndroid Build Coastguard Worker 
3519*bed243d3SAndroid Build Coastguard Worker /// Moves single-precision floating point values from a 128-bit vector
3520*bed243d3SAndroid Build Coastguard Worker ///    of [4 x float] to a memory location pointed to by \a __p, according to
3521*bed243d3SAndroid Build Coastguard Worker ///    the specified mask.
3522*bed243d3SAndroid Build Coastguard Worker ///
3523*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3524*bed243d3SAndroid Build Coastguard Worker ///
3525*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.
3526*bed243d3SAndroid Build Coastguard Worker ///
3527*bed243d3SAndroid Build Coastguard Worker /// \param __p
3528*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a memory location that will receive the float values.
3529*bed243d3SAndroid Build Coastguard Worker /// \param __m
3530*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector containing the mask. The most significant bit of
3531*bed243d3SAndroid Build Coastguard Worker ///    each field in the mask vector represents the mask bits. If a mask bit is
3532*bed243d3SAndroid Build Coastguard Worker ///    zero, the corresponding value from vector __a is not stored and the
3533*bed243d3SAndroid Build Coastguard Worker ///    corresponding field in the memory location pointed to by \a __p is not
3534*bed243d3SAndroid Build Coastguard Worker ///    changed.
3535*bed243d3SAndroid Build Coastguard Worker /// \param __a
3536*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float] containing the values to be stored.
3537*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS128
_mm_maskstore_ps(float * __p,__m128i __m,__m128 __a)3538*bed243d3SAndroid Build Coastguard Worker _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
3539*bed243d3SAndroid Build Coastguard Worker {
3540*bed243d3SAndroid Build Coastguard Worker   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
3541*bed243d3SAndroid Build Coastguard Worker }
3542*bed243d3SAndroid Build Coastguard Worker 
3543*bed243d3SAndroid Build Coastguard Worker /* Cacheability support ops */
3544*bed243d3SAndroid Build Coastguard Worker /// Moves integer data from a 256-bit integer vector to a 32-byte
3545*bed243d3SAndroid Build Coastguard Worker ///    aligned memory location. To minimize caching, the data is flagged as
3546*bed243d3SAndroid Build Coastguard Worker ///    non-temporal (unlikely to be used again soon).
3547*bed243d3SAndroid Build Coastguard Worker ///
3548*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3549*bed243d3SAndroid Build Coastguard Worker ///
3550*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.
3551*bed243d3SAndroid Build Coastguard Worker ///
3552*bed243d3SAndroid Build Coastguard Worker /// \param __a
3553*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 32-byte aligned memory location that will receive the
3554*bed243d3SAndroid Build Coastguard Worker ///    integer values.
3555*bed243d3SAndroid Build Coastguard Worker /// \param __b
3556*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector containing the values to be moved.
3557*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(void * __a,__m256i __b)3558*bed243d3SAndroid Build Coastguard Worker _mm256_stream_si256(void *__a, __m256i __b)
3559*bed243d3SAndroid Build Coastguard Worker {
3560*bed243d3SAndroid Build Coastguard Worker   typedef __v4di __v4di_aligned __attribute__((aligned(32)));
3561*bed243d3SAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
3562*bed243d3SAndroid Build Coastguard Worker }
3563*bed243d3SAndroid Build Coastguard Worker 
3564*bed243d3SAndroid Build Coastguard Worker /// Moves double-precision values from a 256-bit vector of [4 x double]
3565*bed243d3SAndroid Build Coastguard Worker ///    to a 32-byte aligned memory location. To minimize caching, the data is
3566*bed243d3SAndroid Build Coastguard Worker ///    flagged as non-temporal (unlikely to be used again soon).
3567*bed243d3SAndroid Build Coastguard Worker ///
3568*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3569*bed243d3SAndroid Build Coastguard Worker ///
3570*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.
3571*bed243d3SAndroid Build Coastguard Worker ///
3572*bed243d3SAndroid Build Coastguard Worker /// \param __a
3573*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 32-byte aligned memory location that will receive the
3574*bed243d3SAndroid Build Coastguard Worker ///    double-precision floating-point values.
3575*bed243d3SAndroid Build Coastguard Worker /// \param __b
3576*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double] containing the values to be moved.
3577*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(void * __a,__m256d __b)3578*bed243d3SAndroid Build Coastguard Worker _mm256_stream_pd(void *__a, __m256d __b)
3579*bed243d3SAndroid Build Coastguard Worker {
3580*bed243d3SAndroid Build Coastguard Worker   typedef __v4df __v4df_aligned __attribute__((aligned(32)));
3581*bed243d3SAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
3582*bed243d3SAndroid Build Coastguard Worker }
3583*bed243d3SAndroid Build Coastguard Worker 
3584*bed243d3SAndroid Build Coastguard Worker /// Moves single-precision floating point values from a 256-bit vector
3585*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float] to a 32-byte aligned memory location. To minimize
3586*bed243d3SAndroid Build Coastguard Worker ///    caching, the data is flagged as non-temporal (unlikely to be used again
3587*bed243d3SAndroid Build Coastguard Worker ///    soon).
3588*bed243d3SAndroid Build Coastguard Worker ///
3589*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3590*bed243d3SAndroid Build Coastguard Worker ///
3591*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.
3592*bed243d3SAndroid Build Coastguard Worker ///
3593*bed243d3SAndroid Build Coastguard Worker /// \param __p
3594*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 32-byte aligned memory location that will receive the
3595*bed243d3SAndroid Build Coastguard Worker ///    single-precision floating point values.
3596*bed243d3SAndroid Build Coastguard Worker /// \param __a
3597*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float] containing the values to be moved.
3598*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(void * __p,__m256 __a)3599*bed243d3SAndroid Build Coastguard Worker _mm256_stream_ps(void *__p, __m256 __a)
3600*bed243d3SAndroid Build Coastguard Worker {
3601*bed243d3SAndroid Build Coastguard Worker   typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
3602*bed243d3SAndroid Build Coastguard Worker   __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
3603*bed243d3SAndroid Build Coastguard Worker }
3604*bed243d3SAndroid Build Coastguard Worker 
3605*bed243d3SAndroid Build Coastguard Worker /* Create vectors */
3606*bed243d3SAndroid Build Coastguard Worker /// Create a 256-bit vector of [4 x double] with undefined values.
3607*bed243d3SAndroid Build Coastguard Worker ///
3608*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3609*bed243d3SAndroid Build Coastguard Worker ///
3610*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
3611*bed243d3SAndroid Build Coastguard Worker ///
3612*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing undefined values.
3613*bed243d3SAndroid Build Coastguard Worker static __inline__ __m256d __DEFAULT_FN_ATTRS
_mm256_undefined_pd(void)3614*bed243d3SAndroid Build Coastguard Worker _mm256_undefined_pd(void)
3615*bed243d3SAndroid Build Coastguard Worker {
3616*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__builtin_ia32_undef256();
3617*bed243d3SAndroid Build Coastguard Worker }
3618*bed243d3SAndroid Build Coastguard Worker 
3619*bed243d3SAndroid Build Coastguard Worker /// Create a 256-bit vector of [8 x float] with undefined values.
3620*bed243d3SAndroid Build Coastguard Worker ///
3621*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3622*bed243d3SAndroid Build Coastguard Worker ///
3623*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
3624*bed243d3SAndroid Build Coastguard Worker ///
3625*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing undefined values.
3626*bed243d3SAndroid Build Coastguard Worker static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm256_undefined_ps(void)3627*bed243d3SAndroid Build Coastguard Worker _mm256_undefined_ps(void)
3628*bed243d3SAndroid Build Coastguard Worker {
3629*bed243d3SAndroid Build Coastguard Worker   return (__m256)__builtin_ia32_undef256();
3630*bed243d3SAndroid Build Coastguard Worker }
3631*bed243d3SAndroid Build Coastguard Worker 
3632*bed243d3SAndroid Build Coastguard Worker /// Create a 256-bit integer vector with undefined values.
3633*bed243d3SAndroid Build Coastguard Worker ///
3634*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3635*bed243d3SAndroid Build Coastguard Worker ///
3636*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
3637*bed243d3SAndroid Build Coastguard Worker ///
3638*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing undefined values.
3639*bed243d3SAndroid Build Coastguard Worker static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_undefined_si256(void)3640*bed243d3SAndroid Build Coastguard Worker _mm256_undefined_si256(void)
3641*bed243d3SAndroid Build Coastguard Worker {
3642*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__builtin_ia32_undef256();
3643*bed243d3SAndroid Build Coastguard Worker }
3644*bed243d3SAndroid Build Coastguard Worker 
3645*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double]
3646*bed243d3SAndroid Build Coastguard Worker ///    initialized with the specified double-precision floating-point values.
3647*bed243d3SAndroid Build Coastguard Worker ///
3648*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3649*bed243d3SAndroid Build Coastguard Worker ///
3650*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>
3651*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3652*bed243d3SAndroid Build Coastguard Worker ///
3653*bed243d3SAndroid Build Coastguard Worker /// \param __a
3654*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [255:192]
3655*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3656*bed243d3SAndroid Build Coastguard Worker /// \param __b
3657*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [191:128]
3658*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3659*bed243d3SAndroid Build Coastguard Worker /// \param __c
3660*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [127:64]
3661*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3662*bed243d3SAndroid Build Coastguard Worker /// \param __d
3663*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [63:0]
3664*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3665*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [4 x double].
3666*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_pd(double __a,double __b,double __c,double __d)3667*bed243d3SAndroid Build Coastguard Worker _mm256_set_pd(double __a, double __b, double __c, double __d)
3668*bed243d3SAndroid Build Coastguard Worker {
3669*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256d){ __d, __c, __b, __a };
3670*bed243d3SAndroid Build Coastguard Worker }
3671*bed243d3SAndroid Build Coastguard Worker 
3672*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] initialized
3673*bed243d3SAndroid Build Coastguard Worker ///    with the specified single-precision floating-point values.
3674*bed243d3SAndroid Build Coastguard Worker ///
3675*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3676*bed243d3SAndroid Build Coastguard Worker ///
3677*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3678*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3679*bed243d3SAndroid Build Coastguard Worker ///
3680*bed243d3SAndroid Build Coastguard Worker /// \param __a
3681*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [255:224]
3682*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3683*bed243d3SAndroid Build Coastguard Worker /// \param __b
3684*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [223:192]
3685*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3686*bed243d3SAndroid Build Coastguard Worker /// \param __c
3687*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [191:160]
3688*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3689*bed243d3SAndroid Build Coastguard Worker /// \param __d
3690*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [159:128]
3691*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3692*bed243d3SAndroid Build Coastguard Worker /// \param __e
3693*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [127:96]
3694*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3695*bed243d3SAndroid Build Coastguard Worker /// \param __f
3696*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [95:64]
3697*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3698*bed243d3SAndroid Build Coastguard Worker /// \param __g
3699*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [63:32]
3700*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3701*bed243d3SAndroid Build Coastguard Worker /// \param __h
3702*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [31:0]
3703*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3704*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [8 x float].
3705*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set_ps(float __a,float __b,float __c,float __d,float __e,float __f,float __g,float __h)3706*bed243d3SAndroid Build Coastguard Worker _mm256_set_ps(float __a, float __b, float __c, float __d,
3707*bed243d3SAndroid Build Coastguard Worker               float __e, float __f, float __g, float __h)
3708*bed243d3SAndroid Build Coastguard Worker {
3709*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
3710*bed243d3SAndroid Build Coastguard Worker }
3711*bed243d3SAndroid Build Coastguard Worker 
3712*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector initialized with the specified
3713*bed243d3SAndroid Build Coastguard Worker ///    32-bit integral values.
3714*bed243d3SAndroid Build Coastguard Worker ///
3715*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3716*bed243d3SAndroid Build Coastguard Worker ///
3717*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3718*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3719*bed243d3SAndroid Build Coastguard Worker ///
3720*bed243d3SAndroid Build Coastguard Worker /// \param __i0
3721*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [255:224] of the result.
3722*bed243d3SAndroid Build Coastguard Worker /// \param __i1
3723*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [223:192] of the result.
3724*bed243d3SAndroid Build Coastguard Worker /// \param __i2
3725*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [191:160] of the result.
3726*bed243d3SAndroid Build Coastguard Worker /// \param __i3
3727*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [159:128] of the result.
3728*bed243d3SAndroid Build Coastguard Worker /// \param __i4
3729*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [127:96] of the result.
3730*bed243d3SAndroid Build Coastguard Worker /// \param __i5
3731*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [95:64] of the result.
3732*bed243d3SAndroid Build Coastguard Worker /// \param __i6
3733*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [63:32] of the result.
3734*bed243d3SAndroid Build Coastguard Worker /// \param __i7
3735*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [31:0] of the result.
3736*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
3737*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi32(int __i0,int __i1,int __i2,int __i3,int __i4,int __i5,int __i6,int __i7)3738*bed243d3SAndroid Build Coastguard Worker _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
3739*bed243d3SAndroid Build Coastguard Worker                  int __i4, int __i5, int __i6, int __i7)
3740*bed243d3SAndroid Build Coastguard Worker {
3741*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
3742*bed243d3SAndroid Build Coastguard Worker }
3743*bed243d3SAndroid Build Coastguard Worker 
3744*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector initialized with the specified
3745*bed243d3SAndroid Build Coastguard Worker ///    16-bit integral values.
3746*bed243d3SAndroid Build Coastguard Worker ///
3747*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3748*bed243d3SAndroid Build Coastguard Worker ///
3749*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3750*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3751*bed243d3SAndroid Build Coastguard Worker ///
3752*bed243d3SAndroid Build Coastguard Worker /// \param __w15
3753*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [255:240] of the result.
3754*bed243d3SAndroid Build Coastguard Worker /// \param __w14
3755*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [239:224] of the result.
3756*bed243d3SAndroid Build Coastguard Worker /// \param __w13
3757*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [223:208] of the result.
3758*bed243d3SAndroid Build Coastguard Worker /// \param __w12
3759*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [207:192] of the result.
3760*bed243d3SAndroid Build Coastguard Worker /// \param __w11
3761*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [191:176] of the result.
3762*bed243d3SAndroid Build Coastguard Worker /// \param __w10
3763*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [175:160] of the result.
3764*bed243d3SAndroid Build Coastguard Worker /// \param __w09
3765*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [159:144] of the result.
3766*bed243d3SAndroid Build Coastguard Worker /// \param __w08
3767*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [143:128] of the result.
3768*bed243d3SAndroid Build Coastguard Worker /// \param __w07
3769*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [127:112] of the result.
3770*bed243d3SAndroid Build Coastguard Worker /// \param __w06
3771*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [111:96] of the result.
3772*bed243d3SAndroid Build Coastguard Worker /// \param __w05
3773*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [95:80] of the result.
3774*bed243d3SAndroid Build Coastguard Worker /// \param __w04
3775*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [79:64] of the result.
3776*bed243d3SAndroid Build Coastguard Worker /// \param __w03
3777*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [63:48] of the result.
3778*bed243d3SAndroid Build Coastguard Worker /// \param __w02
3779*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [47:32] of the result.
3780*bed243d3SAndroid Build Coastguard Worker /// \param __w01
3781*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [31:16] of the result.
3782*bed243d3SAndroid Build Coastguard Worker /// \param __w00
3783*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [15:0] of the result.
3784*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
3785*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi16(short __w15,short __w14,short __w13,short __w12,short __w11,short __w10,short __w09,short __w08,short __w07,short __w06,short __w05,short __w04,short __w03,short __w02,short __w01,short __w00)3786*bed243d3SAndroid Build Coastguard Worker _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
3787*bed243d3SAndroid Build Coastguard Worker                  short __w11, short __w10, short __w09, short __w08,
3788*bed243d3SAndroid Build Coastguard Worker                  short __w07, short __w06, short __w05, short __w04,
3789*bed243d3SAndroid Build Coastguard Worker                  short __w03, short __w02, short __w01, short __w00)
3790*bed243d3SAndroid Build Coastguard Worker {
3791*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
3792*bed243d3SAndroid Build Coastguard Worker     __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
3793*bed243d3SAndroid Build Coastguard Worker }
3794*bed243d3SAndroid Build Coastguard Worker 
3795*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector initialized with the specified
3796*bed243d3SAndroid Build Coastguard Worker ///    8-bit integral values.
3797*bed243d3SAndroid Build Coastguard Worker ///
3798*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3799*bed243d3SAndroid Build Coastguard Worker ///
3800*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3801*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3802*bed243d3SAndroid Build Coastguard Worker ///
3803*bed243d3SAndroid Build Coastguard Worker /// \param __b31
3804*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [255:248] of the result.
3805*bed243d3SAndroid Build Coastguard Worker /// \param __b30
3806*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [247:240] of the result.
3807*bed243d3SAndroid Build Coastguard Worker /// \param __b29
3808*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [239:232] of the result.
3809*bed243d3SAndroid Build Coastguard Worker /// \param __b28
3810*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [231:224] of the result.
3811*bed243d3SAndroid Build Coastguard Worker /// \param __b27
3812*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [223:216] of the result.
3813*bed243d3SAndroid Build Coastguard Worker /// \param __b26
3814*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [215:208] of the result.
3815*bed243d3SAndroid Build Coastguard Worker /// \param __b25
3816*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [207:200] of the result.
3817*bed243d3SAndroid Build Coastguard Worker /// \param __b24
3818*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [199:192] of the result.
3819*bed243d3SAndroid Build Coastguard Worker /// \param __b23
3820*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [191:184] of the result.
3821*bed243d3SAndroid Build Coastguard Worker /// \param __b22
3822*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [183:176] of the result.
3823*bed243d3SAndroid Build Coastguard Worker /// \param __b21
3824*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [175:168] of the result.
3825*bed243d3SAndroid Build Coastguard Worker /// \param __b20
3826*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [167:160] of the result.
3827*bed243d3SAndroid Build Coastguard Worker /// \param __b19
3828*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [159:152] of the result.
3829*bed243d3SAndroid Build Coastguard Worker /// \param __b18
3830*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [151:144] of the result.
3831*bed243d3SAndroid Build Coastguard Worker /// \param __b17
3832*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [143:136] of the result.
3833*bed243d3SAndroid Build Coastguard Worker /// \param __b16
3834*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [135:128] of the result.
3835*bed243d3SAndroid Build Coastguard Worker /// \param __b15
3836*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [127:120] of the result.
3837*bed243d3SAndroid Build Coastguard Worker /// \param __b14
3838*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [119:112] of the result.
3839*bed243d3SAndroid Build Coastguard Worker /// \param __b13
3840*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [111:104] of the result.
3841*bed243d3SAndroid Build Coastguard Worker /// \param __b12
3842*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [103:96] of the result.
3843*bed243d3SAndroid Build Coastguard Worker /// \param __b11
3844*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [95:88] of the result.
3845*bed243d3SAndroid Build Coastguard Worker /// \param __b10
3846*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [87:80] of the result.
3847*bed243d3SAndroid Build Coastguard Worker /// \param __b09
3848*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [79:72] of the result.
3849*bed243d3SAndroid Build Coastguard Worker /// \param __b08
3850*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [71:64] of the result.
3851*bed243d3SAndroid Build Coastguard Worker /// \param __b07
3852*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [63:56] of the result.
3853*bed243d3SAndroid Build Coastguard Worker /// \param __b06
3854*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [55:48] of the result.
3855*bed243d3SAndroid Build Coastguard Worker /// \param __b05
3856*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [47:40] of the result.
3857*bed243d3SAndroid Build Coastguard Worker /// \param __b04
3858*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [39:32] of the result.
3859*bed243d3SAndroid Build Coastguard Worker /// \param __b03
3860*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [31:24] of the result.
3861*bed243d3SAndroid Build Coastguard Worker /// \param __b02
3862*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [23:16] of the result.
3863*bed243d3SAndroid Build Coastguard Worker /// \param __b01
3864*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [15:8] of the result.
3865*bed243d3SAndroid Build Coastguard Worker /// \param __b00
3866*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [7:0] of the result.
3867*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
3868*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi8(char __b31,char __b30,char __b29,char __b28,char __b27,char __b26,char __b25,char __b24,char __b23,char __b22,char __b21,char __b20,char __b19,char __b18,char __b17,char __b16,char __b15,char __b14,char __b13,char __b12,char __b11,char __b10,char __b09,char __b08,char __b07,char __b06,char __b05,char __b04,char __b03,char __b02,char __b01,char __b00)3869*bed243d3SAndroid Build Coastguard Worker _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
3870*bed243d3SAndroid Build Coastguard Worker                 char __b27, char __b26, char __b25, char __b24,
3871*bed243d3SAndroid Build Coastguard Worker                 char __b23, char __b22, char __b21, char __b20,
3872*bed243d3SAndroid Build Coastguard Worker                 char __b19, char __b18, char __b17, char __b16,
3873*bed243d3SAndroid Build Coastguard Worker                 char __b15, char __b14, char __b13, char __b12,
3874*bed243d3SAndroid Build Coastguard Worker                 char __b11, char __b10, char __b09, char __b08,
3875*bed243d3SAndroid Build Coastguard Worker                 char __b07, char __b06, char __b05, char __b04,
3876*bed243d3SAndroid Build Coastguard Worker                 char __b03, char __b02, char __b01, char __b00)
3877*bed243d3SAndroid Build Coastguard Worker {
3878*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256i)(__v32qi){
3879*bed243d3SAndroid Build Coastguard Worker     __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
3880*bed243d3SAndroid Build Coastguard Worker     __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
3881*bed243d3SAndroid Build Coastguard Worker     __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
3882*bed243d3SAndroid Build Coastguard Worker     __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31
3883*bed243d3SAndroid Build Coastguard Worker   };
3884*bed243d3SAndroid Build Coastguard Worker }
3885*bed243d3SAndroid Build Coastguard Worker 
3886*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector initialized with the specified
3887*bed243d3SAndroid Build Coastguard Worker ///    64-bit integral values.
3888*bed243d3SAndroid Build Coastguard Worker ///
3889*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3890*bed243d3SAndroid Build Coastguard Worker ///
3891*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>
3892*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3893*bed243d3SAndroid Build Coastguard Worker ///
3894*bed243d3SAndroid Build Coastguard Worker /// \param __a
3895*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [255:192] of the result.
3896*bed243d3SAndroid Build Coastguard Worker /// \param __b
3897*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [191:128] of the result.
3898*bed243d3SAndroid Build Coastguard Worker /// \param __c
3899*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [127:64] of the result.
3900*bed243d3SAndroid Build Coastguard Worker /// \param __d
3901*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [63:0] of the result.
3902*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
3903*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_epi64x(long long __a,long long __b,long long __c,long long __d)3904*bed243d3SAndroid Build Coastguard Worker _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
3905*bed243d3SAndroid Build Coastguard Worker {
3906*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };
3907*bed243d3SAndroid Build Coastguard Worker }
3908*bed243d3SAndroid Build Coastguard Worker 
3909*bed243d3SAndroid Build Coastguard Worker /* Create vectors with elements in reverse order */
3910*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double],
3911*bed243d3SAndroid Build Coastguard Worker ///    initialized in reverse order with the specified double-precision
3912*bed243d3SAndroid Build Coastguard Worker ///    floating-point values.
3913*bed243d3SAndroid Build Coastguard Worker ///
3914*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3915*bed243d3SAndroid Build Coastguard Worker ///
3916*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>
3917*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3918*bed243d3SAndroid Build Coastguard Worker ///
3919*bed243d3SAndroid Build Coastguard Worker /// \param __a
3920*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [63:0]
3921*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3922*bed243d3SAndroid Build Coastguard Worker /// \param __b
3923*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [127:64]
3924*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3925*bed243d3SAndroid Build Coastguard Worker /// \param __c
3926*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [191:128]
3927*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3928*bed243d3SAndroid Build Coastguard Worker /// \param __d
3929*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize bits [255:192]
3930*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3931*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [4 x double].
3932*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setr_pd(double __a,double __b,double __c,double __d)3933*bed243d3SAndroid Build Coastguard Worker _mm256_setr_pd(double __a, double __b, double __c, double __d)
3934*bed243d3SAndroid Build Coastguard Worker {
3935*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_pd(__d, __c, __b, __a);
3936*bed243d3SAndroid Build Coastguard Worker }
3937*bed243d3SAndroid Build Coastguard Worker 
3938*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float],
3939*bed243d3SAndroid Build Coastguard Worker ///    initialized in reverse order with the specified single-precision
3940*bed243d3SAndroid Build Coastguard Worker ///    float-point values.
3941*bed243d3SAndroid Build Coastguard Worker ///
3942*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3943*bed243d3SAndroid Build Coastguard Worker ///
3944*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3945*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3946*bed243d3SAndroid Build Coastguard Worker ///
3947*bed243d3SAndroid Build Coastguard Worker /// \param __a
3948*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [31:0]
3949*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3950*bed243d3SAndroid Build Coastguard Worker /// \param __b
3951*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [63:32]
3952*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3953*bed243d3SAndroid Build Coastguard Worker /// \param __c
3954*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [95:64]
3955*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3956*bed243d3SAndroid Build Coastguard Worker /// \param __d
3957*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [127:96]
3958*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3959*bed243d3SAndroid Build Coastguard Worker /// \param __e
3960*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [159:128]
3961*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3962*bed243d3SAndroid Build Coastguard Worker /// \param __f
3963*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [191:160]
3964*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3965*bed243d3SAndroid Build Coastguard Worker /// \param __g
3966*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [223:192]
3967*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3968*bed243d3SAndroid Build Coastguard Worker /// \param __h
3969*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize bits [255:224]
3970*bed243d3SAndroid Build Coastguard Worker ///    of the result.
3971*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [8 x float].
3972*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setr_ps(float __a,float __b,float __c,float __d,float __e,float __f,float __g,float __h)3973*bed243d3SAndroid Build Coastguard Worker _mm256_setr_ps(float __a, float __b, float __c, float __d,
3974*bed243d3SAndroid Build Coastguard Worker                float __e, float __f, float __g, float __h)
3975*bed243d3SAndroid Build Coastguard Worker {
3976*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);
3977*bed243d3SAndroid Build Coastguard Worker }
3978*bed243d3SAndroid Build Coastguard Worker 
3979*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector, initialized in reverse order
3980*bed243d3SAndroid Build Coastguard Worker ///    with the specified 32-bit integral values.
3981*bed243d3SAndroid Build Coastguard Worker ///
3982*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
3983*bed243d3SAndroid Build Coastguard Worker ///
3984*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
3985*bed243d3SAndroid Build Coastguard Worker ///   instruction.
3986*bed243d3SAndroid Build Coastguard Worker ///
3987*bed243d3SAndroid Build Coastguard Worker /// \param __i0
3988*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [31:0] of the result.
3989*bed243d3SAndroid Build Coastguard Worker /// \param __i1
3990*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [63:32] of the result.
3991*bed243d3SAndroid Build Coastguard Worker /// \param __i2
3992*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [95:64] of the result.
3993*bed243d3SAndroid Build Coastguard Worker /// \param __i3
3994*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [127:96] of the result.
3995*bed243d3SAndroid Build Coastguard Worker /// \param __i4
3996*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [159:128] of the result.
3997*bed243d3SAndroid Build Coastguard Worker /// \param __i5
3998*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [191:160] of the result.
3999*bed243d3SAndroid Build Coastguard Worker /// \param __i6
4000*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [223:192] of the result.
4001*bed243d3SAndroid Build Coastguard Worker /// \param __i7
4002*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize bits [255:224] of the result.
4003*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
4004*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi32(int __i0,int __i1,int __i2,int __i3,int __i4,int __i5,int __i6,int __i7)4005*bed243d3SAndroid Build Coastguard Worker _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
4006*bed243d3SAndroid Build Coastguard Worker                   int __i4, int __i5, int __i6, int __i7)
4007*bed243d3SAndroid Build Coastguard Worker {
4008*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);
4009*bed243d3SAndroid Build Coastguard Worker }
4010*bed243d3SAndroid Build Coastguard Worker 
4011*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector, initialized in reverse order
4012*bed243d3SAndroid Build Coastguard Worker ///    with the specified 16-bit integral values.
4013*bed243d3SAndroid Build Coastguard Worker ///
4014*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4015*bed243d3SAndroid Build Coastguard Worker ///
4016*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
4017*bed243d3SAndroid Build Coastguard Worker ///   instruction.
4018*bed243d3SAndroid Build Coastguard Worker ///
4019*bed243d3SAndroid Build Coastguard Worker /// \param __w15
4020*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [15:0] of the result.
4021*bed243d3SAndroid Build Coastguard Worker /// \param __w14
4022*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [31:16] of the result.
4023*bed243d3SAndroid Build Coastguard Worker /// \param __w13
4024*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [47:32] of the result.
4025*bed243d3SAndroid Build Coastguard Worker /// \param __w12
4026*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [63:48] of the result.
4027*bed243d3SAndroid Build Coastguard Worker /// \param __w11
4028*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [79:64] of the result.
4029*bed243d3SAndroid Build Coastguard Worker /// \param __w10
4030*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [95:80] of the result.
4031*bed243d3SAndroid Build Coastguard Worker /// \param __w09
4032*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [111:96] of the result.
4033*bed243d3SAndroid Build Coastguard Worker /// \param __w08
4034*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [127:112] of the result.
4035*bed243d3SAndroid Build Coastguard Worker /// \param __w07
4036*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [143:128] of the result.
4037*bed243d3SAndroid Build Coastguard Worker /// \param __w06
4038*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [159:144] of the result.
4039*bed243d3SAndroid Build Coastguard Worker /// \param __w05
4040*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [175:160] of the result.
4041*bed243d3SAndroid Build Coastguard Worker /// \param __w04
4042*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [191:176] of the result.
4043*bed243d3SAndroid Build Coastguard Worker /// \param __w03
4044*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [207:192] of the result.
4045*bed243d3SAndroid Build Coastguard Worker /// \param __w02
4046*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [223:208] of the result.
4047*bed243d3SAndroid Build Coastguard Worker /// \param __w01
4048*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [239:224] of the result.
4049*bed243d3SAndroid Build Coastguard Worker /// \param __w00
4050*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize bits [255:240] of the result.
4051*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
4052*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi16(short __w15,short __w14,short __w13,short __w12,short __w11,short __w10,short __w09,short __w08,short __w07,short __w06,short __w05,short __w04,short __w03,short __w02,short __w01,short __w00)4053*bed243d3SAndroid Build Coastguard Worker _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
4054*bed243d3SAndroid Build Coastguard Worker        short __w11, short __w10, short __w09, short __w08,
4055*bed243d3SAndroid Build Coastguard Worker        short __w07, short __w06, short __w05, short __w04,
4056*bed243d3SAndroid Build Coastguard Worker        short __w03, short __w02, short __w01, short __w00)
4057*bed243d3SAndroid Build Coastguard Worker {
4058*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi16(__w00, __w01, __w02, __w03,
4059*bed243d3SAndroid Build Coastguard Worker                           __w04, __w05, __w06, __w07,
4060*bed243d3SAndroid Build Coastguard Worker                           __w08, __w09, __w10, __w11,
4061*bed243d3SAndroid Build Coastguard Worker                           __w12, __w13, __w14, __w15);
4062*bed243d3SAndroid Build Coastguard Worker }
4063*bed243d3SAndroid Build Coastguard Worker 
4064*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector, initialized in reverse order
4065*bed243d3SAndroid Build Coastguard Worker ///    with the specified 8-bit integral values.
4066*bed243d3SAndroid Build Coastguard Worker ///
4067*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4068*bed243d3SAndroid Build Coastguard Worker ///
4069*bed243d3SAndroid Build Coastguard Worker /// This intrinsic is a utility function and does not correspond to a specific
4070*bed243d3SAndroid Build Coastguard Worker ///   instruction.
4071*bed243d3SAndroid Build Coastguard Worker ///
4072*bed243d3SAndroid Build Coastguard Worker /// \param __b31
4073*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [7:0] of the result.
4074*bed243d3SAndroid Build Coastguard Worker /// \param __b30
4075*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [15:8] of the result.
4076*bed243d3SAndroid Build Coastguard Worker /// \param __b29
4077*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [23:16] of the result.
4078*bed243d3SAndroid Build Coastguard Worker /// \param __b28
4079*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [31:24] of the result.
4080*bed243d3SAndroid Build Coastguard Worker /// \param __b27
4081*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [39:32] of the result.
4082*bed243d3SAndroid Build Coastguard Worker /// \param __b26
4083*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [47:40] of the result.
4084*bed243d3SAndroid Build Coastguard Worker /// \param __b25
4085*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [55:48] of the result.
4086*bed243d3SAndroid Build Coastguard Worker /// \param __b24
4087*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [63:56] of the result.
4088*bed243d3SAndroid Build Coastguard Worker /// \param __b23
4089*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [71:64] of the result.
4090*bed243d3SAndroid Build Coastguard Worker /// \param __b22
4091*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [79:72] of the result.
4092*bed243d3SAndroid Build Coastguard Worker /// \param __b21
4093*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [87:80] of the result.
4094*bed243d3SAndroid Build Coastguard Worker /// \param __b20
4095*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [95:88] of the result.
4096*bed243d3SAndroid Build Coastguard Worker /// \param __b19
4097*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [103:96] of the result.
4098*bed243d3SAndroid Build Coastguard Worker /// \param __b18
4099*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [111:104] of the result.
4100*bed243d3SAndroid Build Coastguard Worker /// \param __b17
4101*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [119:112] of the result.
4102*bed243d3SAndroid Build Coastguard Worker /// \param __b16
4103*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [127:120] of the result.
4104*bed243d3SAndroid Build Coastguard Worker /// \param __b15
4105*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [135:128] of the result.
4106*bed243d3SAndroid Build Coastguard Worker /// \param __b14
4107*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [143:136] of the result.
4108*bed243d3SAndroid Build Coastguard Worker /// \param __b13
4109*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [151:144] of the result.
4110*bed243d3SAndroid Build Coastguard Worker /// \param __b12
4111*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [159:152] of the result.
4112*bed243d3SAndroid Build Coastguard Worker /// \param __b11
4113*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [167:160] of the result.
4114*bed243d3SAndroid Build Coastguard Worker /// \param __b10
4115*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [175:168] of the result.
4116*bed243d3SAndroid Build Coastguard Worker /// \param __b09
4117*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [183:176] of the result.
4118*bed243d3SAndroid Build Coastguard Worker /// \param __b08
4119*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [191:184] of the result.
4120*bed243d3SAndroid Build Coastguard Worker /// \param __b07
4121*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [199:192] of the result.
4122*bed243d3SAndroid Build Coastguard Worker /// \param __b06
4123*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [207:200] of the result.
4124*bed243d3SAndroid Build Coastguard Worker /// \param __b05
4125*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [215:208] of the result.
4126*bed243d3SAndroid Build Coastguard Worker /// \param __b04
4127*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [223:216] of the result.
4128*bed243d3SAndroid Build Coastguard Worker /// \param __b03
4129*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [231:224] of the result.
4130*bed243d3SAndroid Build Coastguard Worker /// \param __b02
4131*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [239:232] of the result.
4132*bed243d3SAndroid Build Coastguard Worker /// \param __b01
4133*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [247:240] of the result.
4134*bed243d3SAndroid Build Coastguard Worker /// \param __b00
4135*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize bits [255:248] of the result.
4136*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
4137*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi8(char __b31,char __b30,char __b29,char __b28,char __b27,char __b26,char __b25,char __b24,char __b23,char __b22,char __b21,char __b20,char __b19,char __b18,char __b17,char __b16,char __b15,char __b14,char __b13,char __b12,char __b11,char __b10,char __b09,char __b08,char __b07,char __b06,char __b05,char __b04,char __b03,char __b02,char __b01,char __b00)4138*bed243d3SAndroid Build Coastguard Worker _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
4139*bed243d3SAndroid Build Coastguard Worker                  char __b27, char __b26, char __b25, char __b24,
4140*bed243d3SAndroid Build Coastguard Worker                  char __b23, char __b22, char __b21, char __b20,
4141*bed243d3SAndroid Build Coastguard Worker                  char __b19, char __b18, char __b17, char __b16,
4142*bed243d3SAndroid Build Coastguard Worker                  char __b15, char __b14, char __b13, char __b12,
4143*bed243d3SAndroid Build Coastguard Worker                  char __b11, char __b10, char __b09, char __b08,
4144*bed243d3SAndroid Build Coastguard Worker                  char __b07, char __b06, char __b05, char __b04,
4145*bed243d3SAndroid Build Coastguard Worker                  char __b03, char __b02, char __b01, char __b00)
4146*bed243d3SAndroid Build Coastguard Worker {
4147*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
4148*bed243d3SAndroid Build Coastguard Worker                          __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,
4149*bed243d3SAndroid Build Coastguard Worker                          __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,
4150*bed243d3SAndroid Build Coastguard Worker                          __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);
4151*bed243d3SAndroid Build Coastguard Worker }
4152*bed243d3SAndroid Build Coastguard Worker 
4153*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector, initialized in reverse order
4154*bed243d3SAndroid Build Coastguard Worker ///    with the specified 64-bit integral values.
4155*bed243d3SAndroid Build Coastguard Worker ///
4156*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4157*bed243d3SAndroid Build Coastguard Worker ///
4158*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>
4159*bed243d3SAndroid Build Coastguard Worker ///   instruction.
4160*bed243d3SAndroid Build Coastguard Worker ///
4161*bed243d3SAndroid Build Coastguard Worker /// \param __a
4162*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [63:0] of the result.
4163*bed243d3SAndroid Build Coastguard Worker /// \param __b
4164*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [127:64] of the result.
4165*bed243d3SAndroid Build Coastguard Worker /// \param __c
4166*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [191:128] of the result.
4167*bed243d3SAndroid Build Coastguard Worker /// \param __d
4168*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize bits [255:192] of the result.
4169*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector.
4170*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_epi64x(long long __a,long long __b,long long __c,long long __d)4171*bed243d3SAndroid Build Coastguard Worker _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
4172*bed243d3SAndroid Build Coastguard Worker {
4173*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi64x(__d, __c, __b, __a);
4174*bed243d3SAndroid Build Coastguard Worker }
4175*bed243d3SAndroid Build Coastguard Worker 
4176*bed243d3SAndroid Build Coastguard Worker /* Create vectors with repeated elements */
4177*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double], with each
4178*bed243d3SAndroid Build Coastguard Worker ///    of the four double-precision floating-point vector elements set to the
4179*bed243d3SAndroid Build Coastguard Worker ///    specified double-precision floating-point value.
4180*bed243d3SAndroid Build Coastguard Worker ///
4181*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4182*bed243d3SAndroid Build Coastguard Worker ///
4183*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.
4184*bed243d3SAndroid Build Coastguard Worker ///
4185*bed243d3SAndroid Build Coastguard Worker /// \param __w
4186*bed243d3SAndroid Build Coastguard Worker ///    A double-precision floating-point value used to initialize each vector
4187*bed243d3SAndroid Build Coastguard Worker ///    element of the result.
4188*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [4 x double].
4189*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set1_pd(double __w)4190*bed243d3SAndroid Build Coastguard Worker _mm256_set1_pd(double __w)
4191*bed243d3SAndroid Build Coastguard Worker {
4192*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_pd(__w, __w, __w, __w);
4193*bed243d3SAndroid Build Coastguard Worker }
4194*bed243d3SAndroid Build Coastguard Worker 
4195*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float], with each
4196*bed243d3SAndroid Build Coastguard Worker ///    of the eight single-precision floating-point vector elements set to the
4197*bed243d3SAndroid Build Coastguard Worker ///    specified single-precision floating-point value.
4198*bed243d3SAndroid Build Coastguard Worker ///
4199*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4200*bed243d3SAndroid Build Coastguard Worker ///
4201*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>
4202*bed243d3SAndroid Build Coastguard Worker ///   instruction.
4203*bed243d3SAndroid Build Coastguard Worker ///
4204*bed243d3SAndroid Build Coastguard Worker /// \param __w
4205*bed243d3SAndroid Build Coastguard Worker ///    A single-precision floating-point value used to initialize each vector
4206*bed243d3SAndroid Build Coastguard Worker ///    element of the result.
4207*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit floating-point vector of [8 x float].
4208*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set1_ps(float __w)4209*bed243d3SAndroid Build Coastguard Worker _mm256_set1_ps(float __w)
4210*bed243d3SAndroid Build Coastguard Worker {
4211*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);
4212*bed243d3SAndroid Build Coastguard Worker }
4213*bed243d3SAndroid Build Coastguard Worker 
4214*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector of [8 x i32], with each of the
4215*bed243d3SAndroid Build Coastguard Worker ///    32-bit integral vector elements set to the specified 32-bit integral
4216*bed243d3SAndroid Build Coastguard Worker ///    value.
4217*bed243d3SAndroid Build Coastguard Worker ///
4218*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4219*bed243d3SAndroid Build Coastguard Worker ///
4220*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>
4221*bed243d3SAndroid Build Coastguard Worker ///   instruction.
4222*bed243d3SAndroid Build Coastguard Worker ///
4223*bed243d3SAndroid Build Coastguard Worker /// \param __i
4224*bed243d3SAndroid Build Coastguard Worker ///    A 32-bit integral value used to initialize each vector element of the
4225*bed243d3SAndroid Build Coastguard Worker ///    result.
4226*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector of [8 x i32].
4227*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi32(int __i)4228*bed243d3SAndroid Build Coastguard Worker _mm256_set1_epi32(int __i)
4229*bed243d3SAndroid Build Coastguard Worker {
4230*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);
4231*bed243d3SAndroid Build Coastguard Worker }
4232*bed243d3SAndroid Build Coastguard Worker 
4233*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector of [16 x i16], with each of the
4234*bed243d3SAndroid Build Coastguard Worker ///    16-bit integral vector elements set to the specified 16-bit integral
4235*bed243d3SAndroid Build Coastguard Worker ///    value.
4236*bed243d3SAndroid Build Coastguard Worker ///
4237*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4238*bed243d3SAndroid Build Coastguard Worker ///
4239*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.
4240*bed243d3SAndroid Build Coastguard Worker ///
4241*bed243d3SAndroid Build Coastguard Worker /// \param __w
4242*bed243d3SAndroid Build Coastguard Worker ///    A 16-bit integral value used to initialize each vector element of the
4243*bed243d3SAndroid Build Coastguard Worker ///    result.
4244*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector of [16 x i16].
4245*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi16(short __w)4246*bed243d3SAndroid Build Coastguard Worker _mm256_set1_epi16(short __w)
4247*bed243d3SAndroid Build Coastguard Worker {
4248*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,
4249*bed243d3SAndroid Build Coastguard Worker                           __w, __w, __w, __w, __w, __w, __w, __w);
4250*bed243d3SAndroid Build Coastguard Worker }
4251*bed243d3SAndroid Build Coastguard Worker 
4252*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector of [32 x i8], with each of the
4253*bed243d3SAndroid Build Coastguard Worker ///    8-bit integral vector elements set to the specified 8-bit integral value.
4254*bed243d3SAndroid Build Coastguard Worker ///
4255*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4256*bed243d3SAndroid Build Coastguard Worker ///
4257*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.
4258*bed243d3SAndroid Build Coastguard Worker ///
4259*bed243d3SAndroid Build Coastguard Worker /// \param __b
4260*bed243d3SAndroid Build Coastguard Worker ///    An 8-bit integral value used to initialize each vector element of the
4261*bed243d3SAndroid Build Coastguard Worker ///    result.
4262*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector of [32 x i8].
4263*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi8(char __b)4264*bed243d3SAndroid Build Coastguard Worker _mm256_set1_epi8(char __b)
4265*bed243d3SAndroid Build Coastguard Worker {
4266*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,
4267*bed243d3SAndroid Build Coastguard Worker                          __b, __b, __b, __b, __b, __b, __b, __b,
4268*bed243d3SAndroid Build Coastguard Worker                          __b, __b, __b, __b, __b, __b, __b, __b,
4269*bed243d3SAndroid Build Coastguard Worker                          __b, __b, __b, __b, __b, __b, __b, __b);
4270*bed243d3SAndroid Build Coastguard Worker }
4271*bed243d3SAndroid Build Coastguard Worker 
4272*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector of [4 x i64], with each of the
4273*bed243d3SAndroid Build Coastguard Worker ///    64-bit integral vector elements set to the specified 64-bit integral
4274*bed243d3SAndroid Build Coastguard Worker ///    value.
4275*bed243d3SAndroid Build Coastguard Worker ///
4276*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4277*bed243d3SAndroid Build Coastguard Worker ///
4278*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.
4279*bed243d3SAndroid Build Coastguard Worker ///
4280*bed243d3SAndroid Build Coastguard Worker /// \param __q
4281*bed243d3SAndroid Build Coastguard Worker ///    A 64-bit integral value used to initialize each vector element of the
4282*bed243d3SAndroid Build Coastguard Worker ///    result.
4283*bed243d3SAndroid Build Coastguard Worker /// \returns An initialized 256-bit integer vector of [4 x i64].
4284*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set1_epi64x(long long __q)4285*bed243d3SAndroid Build Coastguard Worker _mm256_set1_epi64x(long long __q)
4286*bed243d3SAndroid Build Coastguard Worker {
4287*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_epi64x(__q, __q, __q, __q);
4288*bed243d3SAndroid Build Coastguard Worker }
4289*bed243d3SAndroid Build Coastguard Worker 
4290*bed243d3SAndroid Build Coastguard Worker /* Create __zeroed vectors */
4291*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double] with all
4292*bed243d3SAndroid Build Coastguard Worker ///    vector elements initialized to zero.
4293*bed243d3SAndroid Build Coastguard Worker ///
4294*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4295*bed243d3SAndroid Build Coastguard Worker ///
4296*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VXORPS </c> instruction.
4297*bed243d3SAndroid Build Coastguard Worker ///
4298*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] with all elements set to zero.
4299*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setzero_pd(void)4300*bed243d3SAndroid Build Coastguard Worker _mm256_setzero_pd(void)
4301*bed243d3SAndroid Build Coastguard Worker {
4302*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
4303*bed243d3SAndroid Build Coastguard Worker }
4304*bed243d3SAndroid Build Coastguard Worker 
4305*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] with all
4306*bed243d3SAndroid Build Coastguard Worker ///    vector elements initialized to zero.
4307*bed243d3SAndroid Build Coastguard Worker ///
4308*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4309*bed243d3SAndroid Build Coastguard Worker ///
4310*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VXORPS </c> instruction.
4311*bed243d3SAndroid Build Coastguard Worker ///
4312*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] with all elements set to zero.
4313*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setzero_ps(void)4314*bed243d3SAndroid Build Coastguard Worker _mm256_setzero_ps(void)
4315*bed243d3SAndroid Build Coastguard Worker {
4316*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
4317*bed243d3SAndroid Build Coastguard Worker }
4318*bed243d3SAndroid Build Coastguard Worker 
4319*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector initialized to zero.
4320*bed243d3SAndroid Build Coastguard Worker ///
4321*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4322*bed243d3SAndroid Build Coastguard Worker ///
4323*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VXORPS </c> instruction.
4324*bed243d3SAndroid Build Coastguard Worker ///
4325*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector initialized to zero.
4326*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setzero_si256(void)4327*bed243d3SAndroid Build Coastguard Worker _mm256_setzero_si256(void)
4328*bed243d3SAndroid Build Coastguard Worker {
4329*bed243d3SAndroid Build Coastguard Worker   return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
4330*bed243d3SAndroid Build Coastguard Worker }
4331*bed243d3SAndroid Build Coastguard Worker 
4332*bed243d3SAndroid Build Coastguard Worker /* Cast between vector types */
4333*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit
4334*bed243d3SAndroid Build Coastguard Worker ///    floating-point vector of [8 x float].
4335*bed243d3SAndroid Build Coastguard Worker ///
4336*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4337*bed243d3SAndroid Build Coastguard Worker ///
4338*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4339*bed243d3SAndroid Build Coastguard Worker ///
4340*bed243d3SAndroid Build Coastguard Worker /// \param __a
4341*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double].
4342*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float] containing the same
4343*bed243d3SAndroid Build Coastguard Worker ///    bitwise pattern as the parameter.
4344*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castpd_ps(__m256d __a)4345*bed243d3SAndroid Build Coastguard Worker _mm256_castpd_ps(__m256d __a)
4346*bed243d3SAndroid Build Coastguard Worker {
4347*bed243d3SAndroid Build Coastguard Worker   return (__m256)__a;
4348*bed243d3SAndroid Build Coastguard Worker }
4349*bed243d3SAndroid Build Coastguard Worker 
4350*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit
4351*bed243d3SAndroid Build Coastguard Worker ///    integer vector.
4352*bed243d3SAndroid Build Coastguard Worker ///
4353*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4354*bed243d3SAndroid Build Coastguard Worker ///
4355*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4356*bed243d3SAndroid Build Coastguard Worker ///
4357*bed243d3SAndroid Build Coastguard Worker /// \param __a
4358*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double].
4359*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the same bitwise pattern as the
4360*bed243d3SAndroid Build Coastguard Worker ///    parameter.
4361*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castpd_si256(__m256d __a)4362*bed243d3SAndroid Build Coastguard Worker _mm256_castpd_si256(__m256d __a)
4363*bed243d3SAndroid Build Coastguard Worker {
4364*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__a;
4365*bed243d3SAndroid Build Coastguard Worker }
4366*bed243d3SAndroid Build Coastguard Worker 
4367*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit
4368*bed243d3SAndroid Build Coastguard Worker ///    floating-point vector of [4 x double].
4369*bed243d3SAndroid Build Coastguard Worker ///
4370*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4371*bed243d3SAndroid Build Coastguard Worker ///
4372*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4373*bed243d3SAndroid Build Coastguard Worker ///
4374*bed243d3SAndroid Build Coastguard Worker /// \param __a
4375*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [8 x float].
4376*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double] containing the same
4377*bed243d3SAndroid Build Coastguard Worker ///    bitwise pattern as the parameter.
4378*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castps_pd(__m256 __a)4379*bed243d3SAndroid Build Coastguard Worker _mm256_castps_pd(__m256 __a)
4380*bed243d3SAndroid Build Coastguard Worker {
4381*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__a;
4382*bed243d3SAndroid Build Coastguard Worker }
4383*bed243d3SAndroid Build Coastguard Worker 
4384*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit
4385*bed243d3SAndroid Build Coastguard Worker ///    integer vector.
4386*bed243d3SAndroid Build Coastguard Worker ///
4387*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4388*bed243d3SAndroid Build Coastguard Worker ///
4389*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4390*bed243d3SAndroid Build Coastguard Worker ///
4391*bed243d3SAndroid Build Coastguard Worker /// \param __a
4392*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [8 x float].
4393*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the same bitwise pattern as the
4394*bed243d3SAndroid Build Coastguard Worker ///    parameter.
4395*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castps_si256(__m256 __a)4396*bed243d3SAndroid Build Coastguard Worker _mm256_castps_si256(__m256 __a)
4397*bed243d3SAndroid Build Coastguard Worker {
4398*bed243d3SAndroid Build Coastguard Worker   return (__m256i)__a;
4399*bed243d3SAndroid Build Coastguard Worker }
4400*bed243d3SAndroid Build Coastguard Worker 
4401*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit integer vector into a 256-bit floating-point vector
4402*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float].
4403*bed243d3SAndroid Build Coastguard Worker ///
4404*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4405*bed243d3SAndroid Build Coastguard Worker ///
4406*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4407*bed243d3SAndroid Build Coastguard Worker ///
4408*bed243d3SAndroid Build Coastguard Worker /// \param __a
4409*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
4410*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float] containing the same
4411*bed243d3SAndroid Build Coastguard Worker ///    bitwise pattern as the parameter.
4412*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castsi256_ps(__m256i __a)4413*bed243d3SAndroid Build Coastguard Worker _mm256_castsi256_ps(__m256i __a)
4414*bed243d3SAndroid Build Coastguard Worker {
4415*bed243d3SAndroid Build Coastguard Worker   return (__m256)__a;
4416*bed243d3SAndroid Build Coastguard Worker }
4417*bed243d3SAndroid Build Coastguard Worker 
4418*bed243d3SAndroid Build Coastguard Worker /// Casts a 256-bit integer vector into a 256-bit floating-point vector
4419*bed243d3SAndroid Build Coastguard Worker ///    of [4 x double].
4420*bed243d3SAndroid Build Coastguard Worker ///
4421*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4422*bed243d3SAndroid Build Coastguard Worker ///
4423*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4424*bed243d3SAndroid Build Coastguard Worker ///
4425*bed243d3SAndroid Build Coastguard Worker /// \param __a
4426*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
4427*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double] containing the same
4428*bed243d3SAndroid Build Coastguard Worker ///    bitwise pattern as the parameter.
4429*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castsi256_pd(__m256i __a)4430*bed243d3SAndroid Build Coastguard Worker _mm256_castsi256_pd(__m256i __a)
4431*bed243d3SAndroid Build Coastguard Worker {
4432*bed243d3SAndroid Build Coastguard Worker   return (__m256d)__a;
4433*bed243d3SAndroid Build Coastguard Worker }
4434*bed243d3SAndroid Build Coastguard Worker 
4435*bed243d3SAndroid Build Coastguard Worker /// Returns the lower 128 bits of a 256-bit floating-point vector of
4436*bed243d3SAndroid Build Coastguard Worker ///    [4 x double] as a 128-bit floating-point vector of [2 x double].
4437*bed243d3SAndroid Build Coastguard Worker ///
4438*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4439*bed243d3SAndroid Build Coastguard Worker ///
4440*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4441*bed243d3SAndroid Build Coastguard Worker ///
4442*bed243d3SAndroid Build Coastguard Worker /// \param __a
4443*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double].
4444*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit floating-point vector of [2 x double] containing the
4445*bed243d3SAndroid Build Coastguard Worker ///    lower 128 bits of the parameter.
4446*bed243d3SAndroid Build Coastguard Worker static __inline __m128d __DEFAULT_FN_ATTRS
_mm256_castpd256_pd128(__m256d __a)4447*bed243d3SAndroid Build Coastguard Worker _mm256_castpd256_pd128(__m256d __a)
4448*bed243d3SAndroid Build Coastguard Worker {
4449*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
4450*bed243d3SAndroid Build Coastguard Worker }
4451*bed243d3SAndroid Build Coastguard Worker 
4452*bed243d3SAndroid Build Coastguard Worker /// Returns the lower 128 bits of a 256-bit floating-point vector of
4453*bed243d3SAndroid Build Coastguard Worker ///    [8 x float] as a 128-bit floating-point vector of [4 x float].
4454*bed243d3SAndroid Build Coastguard Worker ///
4455*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4456*bed243d3SAndroid Build Coastguard Worker ///
4457*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4458*bed243d3SAndroid Build Coastguard Worker ///
4459*bed243d3SAndroid Build Coastguard Worker /// \param __a
4460*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [8 x float].
4461*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit floating-point vector of [4 x float] containing the
4462*bed243d3SAndroid Build Coastguard Worker ///    lower 128 bits of the parameter.
4463*bed243d3SAndroid Build Coastguard Worker static __inline __m128 __DEFAULT_FN_ATTRS
_mm256_castps256_ps128(__m256 __a)4464*bed243d3SAndroid Build Coastguard Worker _mm256_castps256_ps128(__m256 __a)
4465*bed243d3SAndroid Build Coastguard Worker {
4466*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
4467*bed243d3SAndroid Build Coastguard Worker }
4468*bed243d3SAndroid Build Coastguard Worker 
4469*bed243d3SAndroid Build Coastguard Worker /// Truncates a 256-bit integer vector into a 128-bit integer vector.
4470*bed243d3SAndroid Build Coastguard Worker ///
4471*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4472*bed243d3SAndroid Build Coastguard Worker ///
4473*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4474*bed243d3SAndroid Build Coastguard Worker ///
4475*bed243d3SAndroid Build Coastguard Worker /// \param __a
4476*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
4477*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the lower 128 bits of the
4478*bed243d3SAndroid Build Coastguard Worker ///    parameter.
4479*bed243d3SAndroid Build Coastguard Worker static __inline __m128i __DEFAULT_FN_ATTRS
_mm256_castsi256_si128(__m256i __a)4480*bed243d3SAndroid Build Coastguard Worker _mm256_castsi256_si128(__m256i __a)
4481*bed243d3SAndroid Build Coastguard Worker {
4482*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
4483*bed243d3SAndroid Build Coastguard Worker }
4484*bed243d3SAndroid Build Coastguard Worker 
4485*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double] from a
4486*bed243d3SAndroid Build Coastguard Worker ///    128-bit floating-point vector of [2 x double].
4487*bed243d3SAndroid Build Coastguard Worker ///
4488*bed243d3SAndroid Build Coastguard Worker ///    The lower 128 bits contain the value of the source vector. The contents
4489*bed243d3SAndroid Build Coastguard Worker ///    of the upper 128 bits are undefined.
4490*bed243d3SAndroid Build Coastguard Worker ///
4491*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4492*bed243d3SAndroid Build Coastguard Worker ///
4493*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4494*bed243d3SAndroid Build Coastguard Worker ///
4495*bed243d3SAndroid Build Coastguard Worker /// \param __a
4496*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
4497*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4498*bed243d3SAndroid Build Coastguard Worker ///    contain the value of the parameter. The contents of the upper 128 bits
4499*bed243d3SAndroid Build Coastguard Worker ///    are undefined.
4500*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_castpd128_pd256(__m128d __a)4501*bed243d3SAndroid Build Coastguard Worker _mm256_castpd128_pd256(__m128d __a)
4502*bed243d3SAndroid Build Coastguard Worker {
4503*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector(
4504*bed243d3SAndroid Build Coastguard Worker       (__v2df)__a, (__v2df)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4505*bed243d3SAndroid Build Coastguard Worker }
4506*bed243d3SAndroid Build Coastguard Worker 
4507*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] from a
4508*bed243d3SAndroid Build Coastguard Worker ///    128-bit floating-point vector of [4 x float].
4509*bed243d3SAndroid Build Coastguard Worker ///
4510*bed243d3SAndroid Build Coastguard Worker ///    The lower 128 bits contain the value of the source vector. The contents
4511*bed243d3SAndroid Build Coastguard Worker ///    of the upper 128 bits are undefined.
4512*bed243d3SAndroid Build Coastguard Worker ///
4513*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4514*bed243d3SAndroid Build Coastguard Worker ///
4515*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4516*bed243d3SAndroid Build Coastguard Worker ///
4517*bed243d3SAndroid Build Coastguard Worker /// \param __a
4518*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
4519*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4520*bed243d3SAndroid Build Coastguard Worker ///    contain the value of the parameter. The contents of the upper 128 bits
4521*bed243d3SAndroid Build Coastguard Worker ///    are undefined.
4522*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_castps128_ps256(__m128 __a)4523*bed243d3SAndroid Build Coastguard Worker _mm256_castps128_ps256(__m128 __a)
4524*bed243d3SAndroid Build Coastguard Worker {
4525*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4sf)__a,
4526*bed243d3SAndroid Build Coastguard Worker                                  (__v4sf)__builtin_nondeterministic_value(__a),
4527*bed243d3SAndroid Build Coastguard Worker                                  0, 1, 2, 3, 4, 5, 6, 7);
4528*bed243d3SAndroid Build Coastguard Worker }
4529*bed243d3SAndroid Build Coastguard Worker 
4530*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4531*bed243d3SAndroid Build Coastguard Worker ///
4532*bed243d3SAndroid Build Coastguard Worker ///    The lower 128 bits contain the value of the source vector. The contents
4533*bed243d3SAndroid Build Coastguard Worker ///    of the upper 128 bits are undefined.
4534*bed243d3SAndroid Build Coastguard Worker ///
4535*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4536*bed243d3SAndroid Build Coastguard Worker ///
4537*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4538*bed243d3SAndroid Build Coastguard Worker ///
4539*bed243d3SAndroid Build Coastguard Worker /// \param __a
4540*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector.
4541*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4542*bed243d3SAndroid Build Coastguard Worker ///    the parameter. The contents of the upper 128 bits are undefined.
4543*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_castsi128_si256(__m128i __a)4544*bed243d3SAndroid Build Coastguard Worker _mm256_castsi128_si256(__m128i __a)
4545*bed243d3SAndroid Build Coastguard Worker {
4546*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector(
4547*bed243d3SAndroid Build Coastguard Worker       (__v2di)__a, (__v2di)__builtin_nondeterministic_value(__a), 0, 1, 2, 3);
4548*bed243d3SAndroid Build Coastguard Worker }
4549*bed243d3SAndroid Build Coastguard Worker 
4550*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double] from a
4551*bed243d3SAndroid Build Coastguard Worker ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
4552*bed243d3SAndroid Build Coastguard Worker ///    contain the value of the source vector. The upper 128 bits are set
4553*bed243d3SAndroid Build Coastguard Worker ///    to zero.
4554*bed243d3SAndroid Build Coastguard Worker ///
4555*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4556*bed243d3SAndroid Build Coastguard Worker ///
4557*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4558*bed243d3SAndroid Build Coastguard Worker ///
4559*bed243d3SAndroid Build Coastguard Worker /// \param __a
4560*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double].
4561*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits
4562*bed243d3SAndroid Build Coastguard Worker ///    contain the value of the parameter. The upper 128 bits are set to zero.
4563*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_zextpd128_pd256(__m128d __a)4564*bed243d3SAndroid Build Coastguard Worker _mm256_zextpd128_pd256(__m128d __a)
4565*bed243d3SAndroid Build Coastguard Worker {
4566*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);
4567*bed243d3SAndroid Build Coastguard Worker }
4568*bed243d3SAndroid Build Coastguard Worker 
4569*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] from a
4570*bed243d3SAndroid Build Coastguard Worker ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
4571*bed243d3SAndroid Build Coastguard Worker ///    the value of the source vector. The upper 128 bits are set to zero.
4572*bed243d3SAndroid Build Coastguard Worker ///
4573*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4574*bed243d3SAndroid Build Coastguard Worker ///
4575*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4576*bed243d3SAndroid Build Coastguard Worker ///
4577*bed243d3SAndroid Build Coastguard Worker /// \param __a
4578*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float].
4579*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits
4580*bed243d3SAndroid Build Coastguard Worker ///    contain the value of the parameter. The upper 128 bits are set to zero.
4581*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_zextps128_ps256(__m128 __a)4582*bed243d3SAndroid Build Coastguard Worker _mm256_zextps128_ps256(__m128 __a)
4583*bed243d3SAndroid Build Coastguard Worker {
4584*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);
4585*bed243d3SAndroid Build Coastguard Worker }
4586*bed243d3SAndroid Build Coastguard Worker 
4587*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector from a 128-bit integer vector.
4588*bed243d3SAndroid Build Coastguard Worker ///    The lower 128 bits contain the value of the source vector. The upper
4589*bed243d3SAndroid Build Coastguard Worker ///    128 bits are set to zero.
4590*bed243d3SAndroid Build Coastguard Worker ///
4591*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4592*bed243d3SAndroid Build Coastguard Worker ///
4593*bed243d3SAndroid Build Coastguard Worker /// This intrinsic has no corresponding instruction.
4594*bed243d3SAndroid Build Coastguard Worker ///
4595*bed243d3SAndroid Build Coastguard Worker /// \param __a
4596*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector.
4597*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector. The lower 128 bits contain the value of
4598*bed243d3SAndroid Build Coastguard Worker ///    the parameter. The upper 128 bits are set to zero.
4599*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_zextsi128_si256(__m128i __a)4600*bed243d3SAndroid Build Coastguard Worker _mm256_zextsi128_si256(__m128i __a)
4601*bed243d3SAndroid Build Coastguard Worker {
4602*bed243d3SAndroid Build Coastguard Worker   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);
4603*bed243d3SAndroid Build Coastguard Worker }
4604*bed243d3SAndroid Build Coastguard Worker 
4605*bed243d3SAndroid Build Coastguard Worker /*
4606*bed243d3SAndroid Build Coastguard Worker    Vector insert.
4607*bed243d3SAndroid Build Coastguard Worker    We use macros rather than inlines because we only want to accept
4608*bed243d3SAndroid Build Coastguard Worker    invocations where the immediate M is a constant expression.
4609*bed243d3SAndroid Build Coastguard Worker */
4610*bed243d3SAndroid Build Coastguard Worker /// Constructs a new 256-bit vector of [8 x float] by first duplicating
4611*bed243d3SAndroid Build Coastguard Worker ///    a 256-bit vector of [8 x float] given in the first parameter, and then
4612*bed243d3SAndroid Build Coastguard Worker ///    replacing either the upper or the lower 128 bits with the contents of a
4613*bed243d3SAndroid Build Coastguard Worker ///    128-bit vector of [4 x float] in the second parameter.
4614*bed243d3SAndroid Build Coastguard Worker ///
4615*bed243d3SAndroid Build Coastguard Worker ///    The immediate integer parameter determines between the upper or the lower
4616*bed243d3SAndroid Build Coastguard Worker ///    128 bits.
4617*bed243d3SAndroid Build Coastguard Worker ///
4618*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4619*bed243d3SAndroid Build Coastguard Worker ///
4620*bed243d3SAndroid Build Coastguard Worker /// \code
4621*bed243d3SAndroid Build Coastguard Worker /// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);
4622*bed243d3SAndroid Build Coastguard Worker /// \endcode
4623*bed243d3SAndroid Build Coastguard Worker ///
4624*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4625*bed243d3SAndroid Build Coastguard Worker ///
4626*bed243d3SAndroid Build Coastguard Worker /// \param V1
4627*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float]. This vector is copied to the result
4628*bed243d3SAndroid Build Coastguard Worker ///    first, and then either the upper or the lower 128 bits of the result will
4629*bed243d3SAndroid Build Coastguard Worker ///    be replaced by the contents of \a V2.
4630*bed243d3SAndroid Build Coastguard Worker /// \param V2
4631*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [4 x float]. The contents of this parameter are
4632*bed243d3SAndroid Build Coastguard Worker ///    written to either the upper or the lower 128 bits of the result depending
4633*bed243d3SAndroid Build Coastguard Worker ///    on the value of parameter \a M.
4634*bed243d3SAndroid Build Coastguard Worker /// \param M
4635*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines how the values
4636*bed243d3SAndroid Build Coastguard Worker ///    from the two parameters are interleaved: \n
4637*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4638*bed243d3SAndroid Build Coastguard Worker ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4639*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4640*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4641*bed243d3SAndroid Build Coastguard Worker ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4642*bed243d3SAndroid Build Coastguard Worker ///    result.
4643*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
4644*bed243d3SAndroid Build Coastguard Worker #define _mm256_insertf128_ps(V1, V2, M) \
4645*bed243d3SAndroid Build Coastguard Worker   ((__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
4646*bed243d3SAndroid Build Coastguard Worker                                             (__v4sf)(__m128)(V2), (int)(M)))
4647*bed243d3SAndroid Build Coastguard Worker 
4648*bed243d3SAndroid Build Coastguard Worker /// Constructs a new 256-bit vector of [4 x double] by first duplicating
4649*bed243d3SAndroid Build Coastguard Worker ///    a 256-bit vector of [4 x double] given in the first parameter, and then
4650*bed243d3SAndroid Build Coastguard Worker ///    replacing either the upper or the lower 128 bits with the contents of a
4651*bed243d3SAndroid Build Coastguard Worker ///    128-bit vector of [2 x double] in the second parameter.
4652*bed243d3SAndroid Build Coastguard Worker ///
4653*bed243d3SAndroid Build Coastguard Worker ///    The immediate integer parameter determines between the upper or the lower
4654*bed243d3SAndroid Build Coastguard Worker ///    128 bits.
4655*bed243d3SAndroid Build Coastguard Worker ///
4656*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4657*bed243d3SAndroid Build Coastguard Worker ///
4658*bed243d3SAndroid Build Coastguard Worker /// \code
4659*bed243d3SAndroid Build Coastguard Worker /// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);
4660*bed243d3SAndroid Build Coastguard Worker /// \endcode
4661*bed243d3SAndroid Build Coastguard Worker ///
4662*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4663*bed243d3SAndroid Build Coastguard Worker ///
4664*bed243d3SAndroid Build Coastguard Worker /// \param V1
4665*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double]. This vector is copied to the result
4666*bed243d3SAndroid Build Coastguard Worker ///    first, and then either the upper or the lower 128 bits of the result will
4667*bed243d3SAndroid Build Coastguard Worker ///    be replaced by the contents of \a V2.
4668*bed243d3SAndroid Build Coastguard Worker /// \param V2
4669*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit vector of [2 x double]. The contents of this parameter are
4670*bed243d3SAndroid Build Coastguard Worker ///    written to either the upper or the lower 128 bits of the result depending
4671*bed243d3SAndroid Build Coastguard Worker ///    on the value of parameter \a M.
4672*bed243d3SAndroid Build Coastguard Worker /// \param M
4673*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines how the values
4674*bed243d3SAndroid Build Coastguard Worker ///    from the two parameters are interleaved: \n
4675*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4676*bed243d3SAndroid Build Coastguard Worker ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4677*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4678*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4679*bed243d3SAndroid Build Coastguard Worker ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4680*bed243d3SAndroid Build Coastguard Worker ///    result.
4681*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
4682*bed243d3SAndroid Build Coastguard Worker #define _mm256_insertf128_pd(V1, V2, M) \
4683*bed243d3SAndroid Build Coastguard Worker   ((__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
4684*bed243d3SAndroid Build Coastguard Worker                                              (__v2df)(__m128d)(V2), (int)(M)))
4685*bed243d3SAndroid Build Coastguard Worker 
4686*bed243d3SAndroid Build Coastguard Worker /// Constructs a new 256-bit integer vector by first duplicating a
4687*bed243d3SAndroid Build Coastguard Worker ///    256-bit integer vector given in the first parameter, and then replacing
4688*bed243d3SAndroid Build Coastguard Worker ///    either the upper or the lower 128 bits with the contents of a 128-bit
4689*bed243d3SAndroid Build Coastguard Worker ///    integer vector in the second parameter.
4690*bed243d3SAndroid Build Coastguard Worker ///
4691*bed243d3SAndroid Build Coastguard Worker ///    The immediate integer parameter determines between the upper or the lower
4692*bed243d3SAndroid Build Coastguard Worker ///    128 bits.
4693*bed243d3SAndroid Build Coastguard Worker ///
4694*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4695*bed243d3SAndroid Build Coastguard Worker ///
4696*bed243d3SAndroid Build Coastguard Worker /// \code
4697*bed243d3SAndroid Build Coastguard Worker /// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);
4698*bed243d3SAndroid Build Coastguard Worker /// \endcode
4699*bed243d3SAndroid Build Coastguard Worker ///
4700*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4701*bed243d3SAndroid Build Coastguard Worker ///
4702*bed243d3SAndroid Build Coastguard Worker /// \param V1
4703*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector. This vector is copied to the result first, and
4704*bed243d3SAndroid Build Coastguard Worker ///    then either the upper or the lower 128 bits of the result will be
4705*bed243d3SAndroid Build Coastguard Worker ///    replaced by the contents of \a V2.
4706*bed243d3SAndroid Build Coastguard Worker /// \param V2
4707*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector. The contents of this parameter are written to
4708*bed243d3SAndroid Build Coastguard Worker ///    either the upper or the lower 128 bits of the result depending on the
4709*bed243d3SAndroid Build Coastguard Worker ///     value of parameter \a M.
4710*bed243d3SAndroid Build Coastguard Worker /// \param M
4711*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines how the values
4712*bed243d3SAndroid Build Coastguard Worker ///    from the two parameters are interleaved: \n
4713*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, \a V2 are copied to bits [127:0] of the result,
4714*bed243d3SAndroid Build Coastguard Worker ///    and bits [255:128] of \a V1 are copied to bits [255:128] of the
4715*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4716*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, \a V2 are copied to bits [255:128] of the
4717*bed243d3SAndroid Build Coastguard Worker ///    result, and bits [127:0] of \a V1 are copied to bits [127:0] of the
4718*bed243d3SAndroid Build Coastguard Worker ///    result.
4719*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the interleaved values.
4720*bed243d3SAndroid Build Coastguard Worker #define _mm256_insertf128_si256(V1, V2, M) \
4721*bed243d3SAndroid Build Coastguard Worker   ((__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \
4722*bed243d3SAndroid Build Coastguard Worker                                              (__v4si)(__m128i)(V2), (int)(M)))
4723*bed243d3SAndroid Build Coastguard Worker 
4724*bed243d3SAndroid Build Coastguard Worker /*
4725*bed243d3SAndroid Build Coastguard Worker    Vector extract.
4726*bed243d3SAndroid Build Coastguard Worker    We use macros rather than inlines because we only want to accept
4727*bed243d3SAndroid Build Coastguard Worker    invocations where the immediate M is a constant expression.
4728*bed243d3SAndroid Build Coastguard Worker */
4729*bed243d3SAndroid Build Coastguard Worker /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4730*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float], as determined by the immediate integer parameter, and
4731*bed243d3SAndroid Build Coastguard Worker ///    returns the extracted bits as a 128-bit vector of [4 x float].
4732*bed243d3SAndroid Build Coastguard Worker ///
4733*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4734*bed243d3SAndroid Build Coastguard Worker ///
4735*bed243d3SAndroid Build Coastguard Worker /// \code
4736*bed243d3SAndroid Build Coastguard Worker /// __m128 _mm256_extractf128_ps(__m256 V, const int M);
4737*bed243d3SAndroid Build Coastguard Worker /// \endcode
4738*bed243d3SAndroid Build Coastguard Worker ///
4739*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.
4740*bed243d3SAndroid Build Coastguard Worker ///
4741*bed243d3SAndroid Build Coastguard Worker /// \param V
4742*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [8 x float].
4743*bed243d3SAndroid Build Coastguard Worker /// \param M
4744*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines which bits are
4745*bed243d3SAndroid Build Coastguard Worker ///    extracted from the first parameter: \n
4746*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4747*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4748*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4749*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [4 x float] containing the extracted bits.
4750*bed243d3SAndroid Build Coastguard Worker #define _mm256_extractf128_ps(V, M) \
4751*bed243d3SAndroid Build Coastguard Worker   ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)))
4752*bed243d3SAndroid Build Coastguard Worker 
4753*bed243d3SAndroid Build Coastguard Worker /// Extracts either the upper or the lower 128 bits from a 256-bit vector
4754*bed243d3SAndroid Build Coastguard Worker ///    of [4 x double], as determined by the immediate integer parameter, and
4755*bed243d3SAndroid Build Coastguard Worker ///    returns the extracted bits as a 128-bit vector of [2 x double].
4756*bed243d3SAndroid Build Coastguard Worker ///
4757*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4758*bed243d3SAndroid Build Coastguard Worker ///
4759*bed243d3SAndroid Build Coastguard Worker /// \code
4760*bed243d3SAndroid Build Coastguard Worker /// __m128d _mm256_extractf128_pd(__m256d V, const int M);
4761*bed243d3SAndroid Build Coastguard Worker /// \endcode
4762*bed243d3SAndroid Build Coastguard Worker ///
4763*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.
4764*bed243d3SAndroid Build Coastguard Worker ///
4765*bed243d3SAndroid Build Coastguard Worker /// \param V
4766*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit vector of [4 x double].
4767*bed243d3SAndroid Build Coastguard Worker /// \param M
4768*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines which bits are
4769*bed243d3SAndroid Build Coastguard Worker ///    extracted from the first parameter: \n
4770*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4771*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4772*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4773*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit vector of [2 x double] containing the extracted bits.
4774*bed243d3SAndroid Build Coastguard Worker #define _mm256_extractf128_pd(V, M) \
4775*bed243d3SAndroid Build Coastguard Worker   ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)))
4776*bed243d3SAndroid Build Coastguard Worker 
4777*bed243d3SAndroid Build Coastguard Worker /// Extracts either the upper or the lower 128 bits from a 256-bit
4778*bed243d3SAndroid Build Coastguard Worker ///    integer vector, as determined by the immediate integer parameter, and
4779*bed243d3SAndroid Build Coastguard Worker ///    returns the extracted bits as a 128-bit integer vector.
4780*bed243d3SAndroid Build Coastguard Worker ///
4781*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4782*bed243d3SAndroid Build Coastguard Worker ///
4783*bed243d3SAndroid Build Coastguard Worker /// \code
4784*bed243d3SAndroid Build Coastguard Worker /// __m128i _mm256_extractf128_si256(__m256i V, const int M);
4785*bed243d3SAndroid Build Coastguard Worker /// \endcode
4786*bed243d3SAndroid Build Coastguard Worker ///
4787*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.
4788*bed243d3SAndroid Build Coastguard Worker ///
4789*bed243d3SAndroid Build Coastguard Worker /// \param V
4790*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
4791*bed243d3SAndroid Build Coastguard Worker /// \param M
4792*bed243d3SAndroid Build Coastguard Worker ///    An immediate integer. The least significant bit determines which bits are
4793*bed243d3SAndroid Build Coastguard Worker ///    extracted from the first parameter:  \n
4794*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 0, bits [127:0] of \a V are copied to the
4795*bed243d3SAndroid Build Coastguard Worker ///    result. \n
4796*bed243d3SAndroid Build Coastguard Worker ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
4797*bed243d3SAndroid Build Coastguard Worker /// \returns A 128-bit integer vector containing the extracted bits.
4798*bed243d3SAndroid Build Coastguard Worker #define _mm256_extractf128_si256(V, M) \
4799*bed243d3SAndroid Build Coastguard Worker   ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M)))
4800*bed243d3SAndroid Build Coastguard Worker 
4801*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] by
4802*bed243d3SAndroid Build Coastguard Worker ///    concatenating two 128-bit floating-point vectors of [4 x float].
4803*bed243d3SAndroid Build Coastguard Worker ///
4804*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4805*bed243d3SAndroid Build Coastguard Worker ///
4806*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4807*bed243d3SAndroid Build Coastguard Worker ///
4808*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4809*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [4 x float] to be copied to the upper
4810*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4811*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4812*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [4 x float] to be copied to the lower
4813*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4814*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float] containing the
4815*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4816*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_set_m128(__m128 __hi,__m128 __lo)4817*bed243d3SAndroid Build Coastguard Worker _mm256_set_m128 (__m128 __hi, __m128 __lo)
4818*bed243d3SAndroid Build Coastguard Worker {
4819*bed243d3SAndroid Build Coastguard Worker   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
4820*bed243d3SAndroid Build Coastguard Worker }
4821*bed243d3SAndroid Build Coastguard Worker 
4822*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double] by
4823*bed243d3SAndroid Build Coastguard Worker ///    concatenating two 128-bit floating-point vectors of [2 x double].
4824*bed243d3SAndroid Build Coastguard Worker ///
4825*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4826*bed243d3SAndroid Build Coastguard Worker ///
4827*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4828*bed243d3SAndroid Build Coastguard Worker ///
4829*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4830*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [2 x double] to be copied to the upper
4831*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4832*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4833*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [2 x double] to be copied to the lower
4834*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4835*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double] containing the
4836*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4837*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_set_m128d(__m128d __hi,__m128d __lo)4838*bed243d3SAndroid Build Coastguard Worker _mm256_set_m128d (__m128d __hi, __m128d __lo)
4839*bed243d3SAndroid Build Coastguard Worker {
4840*bed243d3SAndroid Build Coastguard Worker   return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
4841*bed243d3SAndroid Build Coastguard Worker }
4842*bed243d3SAndroid Build Coastguard Worker 
4843*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector by concatenating two 128-bit
4844*bed243d3SAndroid Build Coastguard Worker ///    integer vectors.
4845*bed243d3SAndroid Build Coastguard Worker ///
4846*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4847*bed243d3SAndroid Build Coastguard Worker ///
4848*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4849*bed243d3SAndroid Build Coastguard Worker ///
4850*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4851*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector to be copied to the upper 128 bits of the
4852*bed243d3SAndroid Build Coastguard Worker ///    result.
4853*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4854*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector to be copied to the lower 128 bits of the
4855*bed243d3SAndroid Build Coastguard Worker ///    result.
4856*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the concatenated result.
4857*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_set_m128i(__m128i __hi,__m128i __lo)4858*bed243d3SAndroid Build Coastguard Worker _mm256_set_m128i (__m128i __hi, __m128i __lo)
4859*bed243d3SAndroid Build Coastguard Worker {
4860*bed243d3SAndroid Build Coastguard Worker   return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
4861*bed243d3SAndroid Build Coastguard Worker }
4862*bed243d3SAndroid Build Coastguard Worker 
4863*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [8 x float] by
4864*bed243d3SAndroid Build Coastguard Worker ///    concatenating two 128-bit floating-point vectors of [4 x float]. This is
4865*bed243d3SAndroid Build Coastguard Worker ///    similar to _mm256_set_m128, but the order of the input parameters is
4866*bed243d3SAndroid Build Coastguard Worker ///    swapped.
4867*bed243d3SAndroid Build Coastguard Worker ///
4868*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4869*bed243d3SAndroid Build Coastguard Worker ///
4870*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4871*bed243d3SAndroid Build Coastguard Worker ///
4872*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4873*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [4 x float] to be copied to the lower
4874*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4875*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4876*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [4 x float] to be copied to the upper
4877*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4878*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float] containing the
4879*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4880*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_setr_m128(__m128 __lo,__m128 __hi)4881*bed243d3SAndroid Build Coastguard Worker _mm256_setr_m128 (__m128 __lo, __m128 __hi)
4882*bed243d3SAndroid Build Coastguard Worker {
4883*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_m128(__hi, __lo);
4884*bed243d3SAndroid Build Coastguard Worker }
4885*bed243d3SAndroid Build Coastguard Worker 
4886*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit floating-point vector of [4 x double] by
4887*bed243d3SAndroid Build Coastguard Worker ///    concatenating two 128-bit floating-point vectors of [2 x double]. This is
4888*bed243d3SAndroid Build Coastguard Worker ///    similar to _mm256_set_m128d, but the order of the input parameters is
4889*bed243d3SAndroid Build Coastguard Worker ///    swapped.
4890*bed243d3SAndroid Build Coastguard Worker ///
4891*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4892*bed243d3SAndroid Build Coastguard Worker ///
4893*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4894*bed243d3SAndroid Build Coastguard Worker ///
4895*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4896*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [2 x double] to be copied to the lower
4897*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4898*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4899*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit floating-point vector of [2 x double] to be copied to the upper
4900*bed243d3SAndroid Build Coastguard Worker ///    128 bits of the result.
4901*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double] containing the
4902*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4903*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_setr_m128d(__m128d __lo,__m128d __hi)4904*bed243d3SAndroid Build Coastguard Worker _mm256_setr_m128d (__m128d __lo, __m128d __hi)
4905*bed243d3SAndroid Build Coastguard Worker {
4906*bed243d3SAndroid Build Coastguard Worker   return (__m256d)_mm256_set_m128d(__hi, __lo);
4907*bed243d3SAndroid Build Coastguard Worker }
4908*bed243d3SAndroid Build Coastguard Worker 
4909*bed243d3SAndroid Build Coastguard Worker /// Constructs a 256-bit integer vector by concatenating two 128-bit
4910*bed243d3SAndroid Build Coastguard Worker ///    integer vectors. This is similar to _mm256_set_m128i, but the order of
4911*bed243d3SAndroid Build Coastguard Worker ///    the input parameters is swapped.
4912*bed243d3SAndroid Build Coastguard Worker ///
4913*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4914*bed243d3SAndroid Build Coastguard Worker ///
4915*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
4916*bed243d3SAndroid Build Coastguard Worker ///
4917*bed243d3SAndroid Build Coastguard Worker /// \param __lo
4918*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector to be copied to the lower 128 bits of the
4919*bed243d3SAndroid Build Coastguard Worker ///    result.
4920*bed243d3SAndroid Build Coastguard Worker /// \param __hi
4921*bed243d3SAndroid Build Coastguard Worker ///    A 128-bit integer vector to be copied to the upper 128 bits of the
4922*bed243d3SAndroid Build Coastguard Worker ///    result.
4923*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the concatenated result.
4924*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_setr_m128i(__m128i __lo,__m128i __hi)4925*bed243d3SAndroid Build Coastguard Worker _mm256_setr_m128i (__m128i __lo, __m128i __hi)
4926*bed243d3SAndroid Build Coastguard Worker {
4927*bed243d3SAndroid Build Coastguard Worker   return (__m256i)_mm256_set_m128i(__hi, __lo);
4928*bed243d3SAndroid Build Coastguard Worker }
4929*bed243d3SAndroid Build Coastguard Worker 
4930*bed243d3SAndroid Build Coastguard Worker /* SIMD load ops (unaligned) */
4931*bed243d3SAndroid Build Coastguard Worker /// Loads two 128-bit floating-point vectors of [4 x float] from
4932*bed243d3SAndroid Build Coastguard Worker ///    unaligned memory locations and constructs a 256-bit floating-point vector
4933*bed243d3SAndroid Build Coastguard Worker ///    of [8 x float] by concatenating the two 128-bit vectors.
4934*bed243d3SAndroid Build Coastguard Worker ///
4935*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4936*bed243d3SAndroid Build Coastguard Worker ///
4937*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to load instructions followed by the
4938*bed243d3SAndroid Build Coastguard Worker ///   <c> VINSERTF128 </c> instruction.
4939*bed243d3SAndroid Build Coastguard Worker ///
4940*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
4941*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing 4 consecutive
4942*bed243d3SAndroid Build Coastguard Worker ///    single-precision floating-point values. These values are to be copied to
4943*bed243d3SAndroid Build Coastguard Worker ///    bits[255:128] of the result. The address of the memory location does not
4944*bed243d3SAndroid Build Coastguard Worker ///    have to be aligned.
4945*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
4946*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing 4 consecutive
4947*bed243d3SAndroid Build Coastguard Worker ///    single-precision floating-point values. These values are to be copied to
4948*bed243d3SAndroid Build Coastguard Worker ///    bits[127:0] of the result. The address of the memory location does not
4949*bed243d3SAndroid Build Coastguard Worker ///    have to be aligned.
4950*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [8 x float] containing the
4951*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4952*bed243d3SAndroid Build Coastguard Worker static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu2_m128(float const * __addr_hi,float const * __addr_lo)4953*bed243d3SAndroid Build Coastguard Worker _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
4954*bed243d3SAndroid Build Coastguard Worker {
4955*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_m128(_mm_loadu_ps(__addr_hi), _mm_loadu_ps(__addr_lo));
4956*bed243d3SAndroid Build Coastguard Worker }
4957*bed243d3SAndroid Build Coastguard Worker 
4958*bed243d3SAndroid Build Coastguard Worker /// Loads two 128-bit floating-point vectors of [2 x double] from
4959*bed243d3SAndroid Build Coastguard Worker ///    unaligned memory locations and constructs a 256-bit floating-point vector
4960*bed243d3SAndroid Build Coastguard Worker ///    of [4 x double] by concatenating the two 128-bit vectors.
4961*bed243d3SAndroid Build Coastguard Worker ///
4962*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4963*bed243d3SAndroid Build Coastguard Worker ///
4964*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to load instructions followed by the
4965*bed243d3SAndroid Build Coastguard Worker ///   <c> VINSERTF128 </c> instruction.
4966*bed243d3SAndroid Build Coastguard Worker ///
4967*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
4968*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing two consecutive
4969*bed243d3SAndroid Build Coastguard Worker ///    double-precision floating-point values. These values are to be copied to
4970*bed243d3SAndroid Build Coastguard Worker ///    bits[255:128] of the result. The address of the memory location does not
4971*bed243d3SAndroid Build Coastguard Worker ///    have to be aligned.
4972*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
4973*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing two consecutive
4974*bed243d3SAndroid Build Coastguard Worker ///    double-precision floating-point values. These values are to be copied to
4975*bed243d3SAndroid Build Coastguard Worker ///    bits[127:0] of the result. The address of the memory location does not
4976*bed243d3SAndroid Build Coastguard Worker ///    have to be aligned.
4977*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit floating-point vector of [4 x double] containing the
4978*bed243d3SAndroid Build Coastguard Worker ///    concatenated result.
4979*bed243d3SAndroid Build Coastguard Worker static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu2_m128d(double const * __addr_hi,double const * __addr_lo)4980*bed243d3SAndroid Build Coastguard Worker _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
4981*bed243d3SAndroid Build Coastguard Worker {
4982*bed243d3SAndroid Build Coastguard Worker   return _mm256_set_m128d(_mm_loadu_pd(__addr_hi), _mm_loadu_pd(__addr_lo));
4983*bed243d3SAndroid Build Coastguard Worker }
4984*bed243d3SAndroid Build Coastguard Worker 
4985*bed243d3SAndroid Build Coastguard Worker /// Loads two 128-bit integer vectors from unaligned memory locations and
4986*bed243d3SAndroid Build Coastguard Worker ///    constructs a 256-bit integer vector by concatenating the two 128-bit
4987*bed243d3SAndroid Build Coastguard Worker ///    vectors.
4988*bed243d3SAndroid Build Coastguard Worker ///
4989*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
4990*bed243d3SAndroid Build Coastguard Worker ///
4991*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to load instructions followed by the
4992*bed243d3SAndroid Build Coastguard Worker ///   <c> VINSERTF128 </c> instruction.
4993*bed243d3SAndroid Build Coastguard Worker ///
4994*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
4995*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing a 128-bit integer
4996*bed243d3SAndroid Build Coastguard Worker ///    vector. This vector is to be copied to bits[255:128] of the result. The
4997*bed243d3SAndroid Build Coastguard Worker ///    address of the memory location does not have to be aligned.
4998*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
4999*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location containing a 128-bit integer
5000*bed243d3SAndroid Build Coastguard Worker ///    vector. This vector is to be copied to bits[127:0] of the result. The
5001*bed243d3SAndroid Build Coastguard Worker ///    address of the memory location does not have to be aligned.
5002*bed243d3SAndroid Build Coastguard Worker /// \returns A 256-bit integer vector containing the concatenated result.
5003*bed243d3SAndroid Build Coastguard Worker static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu2_m128i(__m128i_u const * __addr_hi,__m128i_u const * __addr_lo)5004*bed243d3SAndroid Build Coastguard Worker _mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo)
5005*bed243d3SAndroid Build Coastguard Worker {
5006*bed243d3SAndroid Build Coastguard Worker    return _mm256_set_m128i(_mm_loadu_si128(__addr_hi), _mm_loadu_si128(__addr_lo));
5007*bed243d3SAndroid Build Coastguard Worker }
5008*bed243d3SAndroid Build Coastguard Worker 
5009*bed243d3SAndroid Build Coastguard Worker /* SIMD store ops (unaligned) */
5010*bed243d3SAndroid Build Coastguard Worker /// Stores the upper and lower 128 bits of a 256-bit floating-point
5011*bed243d3SAndroid Build Coastguard Worker ///    vector of [8 x float] into two different unaligned memory locations.
5012*bed243d3SAndroid Build Coastguard Worker ///
5013*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
5014*bed243d3SAndroid Build Coastguard Worker ///
5015*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the
5016*bed243d3SAndroid Build Coastguard Worker ///   store instructions.
5017*bed243d3SAndroid Build Coastguard Worker ///
5018*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
5019*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5020*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5021*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5022*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
5023*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5024*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5025*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5026*bed243d3SAndroid Build Coastguard Worker /// \param __a
5027*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [8 x float].
5028*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128(float * __addr_hi,float * __addr_lo,__m256 __a)5029*bed243d3SAndroid Build Coastguard Worker _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
5030*bed243d3SAndroid Build Coastguard Worker {
5031*bed243d3SAndroid Build Coastguard Worker   __m128 __v128;
5032*bed243d3SAndroid Build Coastguard Worker 
5033*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_castps256_ps128(__a);
5034*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_ps(__addr_lo, __v128);
5035*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_ps(__a, 1);
5036*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_ps(__addr_hi, __v128);
5037*bed243d3SAndroid Build Coastguard Worker }
5038*bed243d3SAndroid Build Coastguard Worker 
5039*bed243d3SAndroid Build Coastguard Worker /// Stores the upper and lower 128 bits of a 256-bit floating-point
5040*bed243d3SAndroid Build Coastguard Worker ///    vector of [4 x double] into two different unaligned memory locations.
5041*bed243d3SAndroid Build Coastguard Worker ///
5042*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
5043*bed243d3SAndroid Build Coastguard Worker ///
5044*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the
5045*bed243d3SAndroid Build Coastguard Worker ///   store instructions.
5046*bed243d3SAndroid Build Coastguard Worker ///
5047*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
5048*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5049*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5050*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5051*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
5052*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5053*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5054*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5055*bed243d3SAndroid Build Coastguard Worker /// \param __a
5056*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit floating-point vector of [4 x double].
5057*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128d(double * __addr_hi,double * __addr_lo,__m256d __a)5058*bed243d3SAndroid Build Coastguard Worker _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
5059*bed243d3SAndroid Build Coastguard Worker {
5060*bed243d3SAndroid Build Coastguard Worker   __m128d __v128;
5061*bed243d3SAndroid Build Coastguard Worker 
5062*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_castpd256_pd128(__a);
5063*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_pd(__addr_lo, __v128);
5064*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_pd(__a, 1);
5065*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_pd(__addr_hi, __v128);
5066*bed243d3SAndroid Build Coastguard Worker }
5067*bed243d3SAndroid Build Coastguard Worker 
5068*bed243d3SAndroid Build Coastguard Worker /// Stores the upper and lower 128 bits of a 256-bit integer vector into
5069*bed243d3SAndroid Build Coastguard Worker ///    two different unaligned memory locations.
5070*bed243d3SAndroid Build Coastguard Worker ///
5071*bed243d3SAndroid Build Coastguard Worker /// \headerfile <x86intrin.h>
5072*bed243d3SAndroid Build Coastguard Worker ///
5073*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the
5074*bed243d3SAndroid Build Coastguard Worker ///   store instructions.
5075*bed243d3SAndroid Build Coastguard Worker ///
5076*bed243d3SAndroid Build Coastguard Worker /// \param __addr_hi
5077*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[255:128] of \a __a are to be
5078*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5079*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5080*bed243d3SAndroid Build Coastguard Worker /// \param __addr_lo
5081*bed243d3SAndroid Build Coastguard Worker ///    A pointer to a 128-bit memory location. Bits[127:0] of \a __a are to be
5082*bed243d3SAndroid Build Coastguard Worker ///    copied to this memory location. The address of this memory location does
5083*bed243d3SAndroid Build Coastguard Worker ///    not have to be aligned.
5084*bed243d3SAndroid Build Coastguard Worker /// \param __a
5085*bed243d3SAndroid Build Coastguard Worker ///    A 256-bit integer vector.
5086*bed243d3SAndroid Build Coastguard Worker static __inline void __DEFAULT_FN_ATTRS
_mm256_storeu2_m128i(__m128i_u * __addr_hi,__m128i_u * __addr_lo,__m256i __a)5087*bed243d3SAndroid Build Coastguard Worker _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
5088*bed243d3SAndroid Build Coastguard Worker {
5089*bed243d3SAndroid Build Coastguard Worker   __m128i __v128;
5090*bed243d3SAndroid Build Coastguard Worker 
5091*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_castsi256_si128(__a);
5092*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_si128(__addr_lo, __v128);
5093*bed243d3SAndroid Build Coastguard Worker   __v128 = _mm256_extractf128_si256(__a, 1);
5094*bed243d3SAndroid Build Coastguard Worker   _mm_storeu_si128(__addr_hi, __v128);
5095*bed243d3SAndroid Build Coastguard Worker }
5096*bed243d3SAndroid Build Coastguard Worker 
5097*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS
5098*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS128
5099*bed243d3SAndroid Build Coastguard Worker 
5100*bed243d3SAndroid Build Coastguard Worker #endif /* __AVXINTRIN_H */
5101