xref: /aosp_15_r20/external/clang/lib/Headers/avx512dqintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li 
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li 
28*67e74705SXin Li #ifndef __AVX512DQINTRIN_H
29*67e74705SXin Li #define __AVX512DQINTRIN_H
30*67e74705SXin Li 
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
33*67e74705SXin Li 
34*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi64(__m512i __A,__m512i __B)35*67e74705SXin Li _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36*67e74705SXin Li   return (__m512i) ((__v8du) __A * (__v8du) __B);
37*67e74705SXin Li }
38*67e74705SXin Li 
39*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mullo_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)40*67e74705SXin Li _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41*67e74705SXin Li   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
42*67e74705SXin Li               (__v8di) __B,
43*67e74705SXin Li               (__v8di) __W,
44*67e74705SXin Li               (__mmask8) __U);
45*67e74705SXin Li }
46*67e74705SXin Li 
47*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mullo_epi64(__mmask8 __U,__m512i __A,__m512i __B)48*67e74705SXin Li _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
49*67e74705SXin Li   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
50*67e74705SXin Li               (__v8di) __B,
51*67e74705SXin Li               (__v8di)
52*67e74705SXin Li               _mm512_setzero_si512 (),
53*67e74705SXin Li               (__mmask8) __U);
54*67e74705SXin Li }
55*67e74705SXin Li 
56*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_xor_pd(__m512d __A,__m512d __B)57*67e74705SXin Li _mm512_xor_pd (__m512d __A, __m512d __B) {
58*67e74705SXin Li   return (__m512d) ((__v8du) __A ^ (__v8du) __B);
59*67e74705SXin Li }
60*67e74705SXin Li 
61*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_xor_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)62*67e74705SXin Li _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
63*67e74705SXin Li   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
64*67e74705SXin Li              (__v8df) __B,
65*67e74705SXin Li              (__v8df) __W,
66*67e74705SXin Li              (__mmask8) __U);
67*67e74705SXin Li }
68*67e74705SXin Li 
69*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_xor_pd(__mmask8 __U,__m512d __A,__m512d __B)70*67e74705SXin Li _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
71*67e74705SXin Li   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
72*67e74705SXin Li              (__v8df) __B,
73*67e74705SXin Li              (__v8df)
74*67e74705SXin Li              _mm512_setzero_pd (),
75*67e74705SXin Li              (__mmask8) __U);
76*67e74705SXin Li }
77*67e74705SXin Li 
78*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_xor_ps(__m512 __A,__m512 __B)79*67e74705SXin Li _mm512_xor_ps (__m512 __A, __m512 __B) {
80*67e74705SXin Li   return (__m512) ((__v16su) __A ^ (__v16su) __B);
81*67e74705SXin Li }
82*67e74705SXin Li 
83*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_xor_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)84*67e74705SXin Li _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
85*67e74705SXin Li   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
86*67e74705SXin Li             (__v16sf) __B,
87*67e74705SXin Li             (__v16sf) __W,
88*67e74705SXin Li             (__mmask16) __U);
89*67e74705SXin Li }
90*67e74705SXin Li 
91*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_xor_ps(__mmask16 __U,__m512 __A,__m512 __B)92*67e74705SXin Li _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
93*67e74705SXin Li   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
94*67e74705SXin Li             (__v16sf) __B,
95*67e74705SXin Li             (__v16sf)
96*67e74705SXin Li             _mm512_setzero_ps (),
97*67e74705SXin Li             (__mmask16) __U);
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_or_pd(__m512d __A,__m512d __B)101*67e74705SXin Li _mm512_or_pd (__m512d __A, __m512d __B) {
102*67e74705SXin Li   return (__m512d) ((__v8du) __A | (__v8du) __B);
103*67e74705SXin Li }
104*67e74705SXin Li 
105*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_or_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)106*67e74705SXin Li _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
107*67e74705SXin Li   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
108*67e74705SXin Li             (__v8df) __B,
109*67e74705SXin Li             (__v8df) __W,
110*67e74705SXin Li             (__mmask8) __U);
111*67e74705SXin Li }
112*67e74705SXin Li 
113*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_or_pd(__mmask8 __U,__m512d __A,__m512d __B)114*67e74705SXin Li _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
115*67e74705SXin Li   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
116*67e74705SXin Li             (__v8df) __B,
117*67e74705SXin Li             (__v8df)
118*67e74705SXin Li             _mm512_setzero_pd (),
119*67e74705SXin Li             (__mmask8) __U);
120*67e74705SXin Li }
121*67e74705SXin Li 
122*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_or_ps(__m512 __A,__m512 __B)123*67e74705SXin Li _mm512_or_ps (__m512 __A, __m512 __B) {
124*67e74705SXin Li   return (__m512) ((__v16su) __A | (__v16su) __B);
125*67e74705SXin Li }
126*67e74705SXin Li 
127*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_or_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)128*67e74705SXin Li _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
129*67e74705SXin Li   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
130*67e74705SXin Li                  (__v16sf) __B,
131*67e74705SXin Li                  (__v16sf) __W,
132*67e74705SXin Li                  (__mmask16) __U);
133*67e74705SXin Li }
134*67e74705SXin Li 
135*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_or_ps(__mmask16 __U,__m512 __A,__m512 __B)136*67e74705SXin Li _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
137*67e74705SXin Li   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
138*67e74705SXin Li                  (__v16sf) __B,
139*67e74705SXin Li                  (__v16sf)
140*67e74705SXin Li                  _mm512_setzero_ps (),
141*67e74705SXin Li                  (__mmask16) __U);
142*67e74705SXin Li }
143*67e74705SXin Li 
144*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_and_pd(__m512d __A,__m512d __B)145*67e74705SXin Li _mm512_and_pd (__m512d __A, __m512d __B) {
146*67e74705SXin Li   return (__m512d) ((__v8du) __A & (__v8du) __B);
147*67e74705SXin Li }
148*67e74705SXin Li 
149*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_and_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)150*67e74705SXin Li _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
151*67e74705SXin Li   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
152*67e74705SXin Li              (__v8df) __B,
153*67e74705SXin Li              (__v8df) __W,
154*67e74705SXin Li              (__mmask8) __U);
155*67e74705SXin Li }
156*67e74705SXin Li 
157*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_and_pd(__mmask8 __U,__m512d __A,__m512d __B)158*67e74705SXin Li _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
159*67e74705SXin Li   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
160*67e74705SXin Li              (__v8df) __B,
161*67e74705SXin Li              (__v8df)
162*67e74705SXin Li              _mm512_setzero_pd (),
163*67e74705SXin Li              (__mmask8) __U);
164*67e74705SXin Li }
165*67e74705SXin Li 
166*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_and_ps(__m512 __A,__m512 __B)167*67e74705SXin Li _mm512_and_ps (__m512 __A, __m512 __B) {
168*67e74705SXin Li   return (__m512) ((__v16su) __A & (__v16su) __B);
169*67e74705SXin Li }
170*67e74705SXin Li 
171*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_and_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)172*67e74705SXin Li _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
173*67e74705SXin Li   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
174*67e74705SXin Li             (__v16sf) __B,
175*67e74705SXin Li             (__v16sf) __W,
176*67e74705SXin Li             (__mmask16) __U);
177*67e74705SXin Li }
178*67e74705SXin Li 
179*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_and_ps(__mmask16 __U,__m512 __A,__m512 __B)180*67e74705SXin Li _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
181*67e74705SXin Li   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
182*67e74705SXin Li             (__v16sf) __B,
183*67e74705SXin Li             (__v16sf)
184*67e74705SXin Li             _mm512_setzero_ps (),
185*67e74705SXin Li             (__mmask16) __U);
186*67e74705SXin Li }
187*67e74705SXin Li 
188*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_andnot_pd(__m512d __A,__m512d __B)189*67e74705SXin Li _mm512_andnot_pd (__m512d __A, __m512d __B) {
190*67e74705SXin Li   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
191*67e74705SXin Li               (__v8df) __B,
192*67e74705SXin Li               (__v8df)
193*67e74705SXin Li               _mm512_setzero_pd (),
194*67e74705SXin Li               (__mmask8) -1);
195*67e74705SXin Li }
196*67e74705SXin Li 
197*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_andnot_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)198*67e74705SXin Li _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
199*67e74705SXin Li   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
200*67e74705SXin Li               (__v8df) __B,
201*67e74705SXin Li               (__v8df) __W,
202*67e74705SXin Li               (__mmask8) __U);
203*67e74705SXin Li }
204*67e74705SXin Li 
205*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_pd(__mmask8 __U,__m512d __A,__m512d __B)206*67e74705SXin Li _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
207*67e74705SXin Li   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
208*67e74705SXin Li               (__v8df) __B,
209*67e74705SXin Li               (__v8df)
210*67e74705SXin Li               _mm512_setzero_pd (),
211*67e74705SXin Li               (__mmask8) __U);
212*67e74705SXin Li }
213*67e74705SXin Li 
214*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_andnot_ps(__m512 __A,__m512 __B)215*67e74705SXin Li _mm512_andnot_ps (__m512 __A, __m512 __B) {
216*67e74705SXin Li   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
217*67e74705SXin Li              (__v16sf) __B,
218*67e74705SXin Li              (__v16sf)
219*67e74705SXin Li              _mm512_setzero_ps (),
220*67e74705SXin Li              (__mmask16) -1);
221*67e74705SXin Li }
222*67e74705SXin Li 
223*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_andnot_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)224*67e74705SXin Li _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
225*67e74705SXin Li   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
226*67e74705SXin Li              (__v16sf) __B,
227*67e74705SXin Li              (__v16sf) __W,
228*67e74705SXin Li              (__mmask16) __U);
229*67e74705SXin Li }
230*67e74705SXin Li 
231*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_ps(__mmask16 __U,__m512 __A,__m512 __B)232*67e74705SXin Li _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
233*67e74705SXin Li   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
234*67e74705SXin Li              (__v16sf) __B,
235*67e74705SXin Li              (__v16sf)
236*67e74705SXin Li              _mm512_setzero_ps (),
237*67e74705SXin Li              (__mmask16) __U);
238*67e74705SXin Li }
239*67e74705SXin Li 
240*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epi64(__m512d __A)241*67e74705SXin Li _mm512_cvtpd_epi64 (__m512d __A) {
242*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
243*67e74705SXin Li                 (__v8di) _mm512_setzero_si512(),
244*67e74705SXin Li                 (__mmask8) -1,
245*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
246*67e74705SXin Li }
247*67e74705SXin Li 
248*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)249*67e74705SXin Li _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
250*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
251*67e74705SXin Li                 (__v8di) __W,
252*67e74705SXin Li                 (__mmask8) __U,
253*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
254*67e74705SXin Li }
255*67e74705SXin Li 
256*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epi64(__mmask8 __U,__m512d __A)257*67e74705SXin Li _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
258*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
259*67e74705SXin Li                 (__v8di) _mm512_setzero_si512(),
260*67e74705SXin Li                 (__mmask8) __U,
261*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
262*67e74705SXin Li }
263*67e74705SXin Li 
264*67e74705SXin Li #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({              \
265*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
266*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
267*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
268*67e74705SXin Li 
269*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
270*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
271*67e74705SXin Li                                            (__v8di)(__m512i)(W), \
272*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
273*67e74705SXin Li 
274*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({   \
275*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
276*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
277*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
278*67e74705SXin Li 
279*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epu64(__m512d __A)280*67e74705SXin Li _mm512_cvtpd_epu64 (__m512d __A) {
281*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
282*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
283*67e74705SXin Li                  (__mmask8) -1,
284*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
285*67e74705SXin Li }
286*67e74705SXin Li 
287*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)288*67e74705SXin Li _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
289*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
290*67e74705SXin Li                  (__v8di) __W,
291*67e74705SXin Li                  (__mmask8) __U,
292*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
293*67e74705SXin Li }
294*67e74705SXin Li 
295*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epu64(__mmask8 __U,__m512d __A)296*67e74705SXin Li _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
297*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
298*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
299*67e74705SXin Li                  (__mmask8) __U,
300*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
301*67e74705SXin Li }
302*67e74705SXin Li 
303*67e74705SXin Li #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({               \
304*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
305*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
306*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
307*67e74705SXin Li 
308*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
309*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
310*67e74705SXin Li                                             (__v8di)(__m512i)(W), \
311*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
312*67e74705SXin Li 
313*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({     \
314*67e74705SXin Li   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
315*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
316*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
317*67e74705SXin Li 
318*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epi64(__m256 __A)319*67e74705SXin Li _mm512_cvtps_epi64 (__m256 __A) {
320*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
321*67e74705SXin Li                 (__v8di) _mm512_setzero_si512(),
322*67e74705SXin Li                 (__mmask8) -1,
323*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
324*67e74705SXin Li }
325*67e74705SXin Li 
326*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epi64(__m512i __W,__mmask8 __U,__m256 __A)327*67e74705SXin Li _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
328*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
329*67e74705SXin Li                 (__v8di) __W,
330*67e74705SXin Li                 (__mmask8) __U,
331*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
332*67e74705SXin Li }
333*67e74705SXin Li 
334*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epi64(__mmask8 __U,__m256 __A)335*67e74705SXin Li _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
336*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
337*67e74705SXin Li                 (__v8di) _mm512_setzero_si512(),
338*67e74705SXin Li                 (__mmask8) __U,
339*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
340*67e74705SXin Li }
341*67e74705SXin Li 
342*67e74705SXin Li #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({             \
343*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
344*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
345*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
346*67e74705SXin Li 
347*67e74705SXin Li #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
348*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
349*67e74705SXin Li                                            (__v8di)(__m512i)(W), \
350*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
351*67e74705SXin Li 
352*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({   \
353*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
354*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
355*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
356*67e74705SXin Li 
357*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epu64(__m256 __A)358*67e74705SXin Li _mm512_cvtps_epu64 (__m256 __A) {
359*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
360*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
361*67e74705SXin Li                  (__mmask8) -1,
362*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
363*67e74705SXin Li }
364*67e74705SXin Li 
365*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epu64(__m512i __W,__mmask8 __U,__m256 __A)366*67e74705SXin Li _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
367*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
368*67e74705SXin Li                  (__v8di) __W,
369*67e74705SXin Li                  (__mmask8) __U,
370*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
371*67e74705SXin Li }
372*67e74705SXin Li 
373*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epu64(__mmask8 __U,__m256 __A)374*67e74705SXin Li _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
375*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
376*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
377*67e74705SXin Li                  (__mmask8) __U,
378*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
379*67e74705SXin Li }
380*67e74705SXin Li 
381*67e74705SXin Li #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({              \
382*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
383*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
384*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
385*67e74705SXin Li 
386*67e74705SXin Li #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
387*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
388*67e74705SXin Li                                             (__v8di)(__m512i)(W), \
389*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
390*67e74705SXin Li 
391*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({   \
392*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
393*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
394*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
395*67e74705SXin Li 
396*67e74705SXin Li 
397*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepi64_pd(__m512i __A)398*67e74705SXin Li _mm512_cvtepi64_pd (__m512i __A) {
399*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
400*67e74705SXin Li                 (__v8df) _mm512_setzero_pd(),
401*67e74705SXin Li                 (__mmask8) -1,
402*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
403*67e74705SXin Li }
404*67e74705SXin Li 
405*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_pd(__m512d __W,__mmask8 __U,__m512i __A)406*67e74705SXin Li _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
407*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
408*67e74705SXin Li                 (__v8df) __W,
409*67e74705SXin Li                 (__mmask8) __U,
410*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
411*67e74705SXin Li }
412*67e74705SXin Li 
413*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_pd(__mmask8 __U,__m512i __A)414*67e74705SXin Li _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
415*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
416*67e74705SXin Li                 (__v8df) _mm512_setzero_pd(),
417*67e74705SXin Li                 (__mmask8) __U,
418*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
419*67e74705SXin Li }
420*67e74705SXin Li 
421*67e74705SXin Li #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({          \
422*67e74705SXin Li   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
423*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
424*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
425*67e74705SXin Li 
426*67e74705SXin Li #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
427*67e74705SXin Li   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
428*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
429*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
430*67e74705SXin Li 
431*67e74705SXin Li #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
432*67e74705SXin Li   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
433*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
434*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
435*67e74705SXin Li 
436*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepi64_ps(__m512i __A)437*67e74705SXin Li _mm512_cvtepi64_ps (__m512i __A) {
438*67e74705SXin Li   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
439*67e74705SXin Li                (__v8sf) _mm256_setzero_ps(),
440*67e74705SXin Li                (__mmask8) -1,
441*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
442*67e74705SXin Li }
443*67e74705SXin Li 
444*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_ps(__m256 __W,__mmask8 __U,__m512i __A)445*67e74705SXin Li _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
446*67e74705SXin Li   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
447*67e74705SXin Li                (__v8sf) __W,
448*67e74705SXin Li                (__mmask8) __U,
449*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
450*67e74705SXin Li }
451*67e74705SXin Li 
452*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_ps(__mmask8 __U,__m512i __A)453*67e74705SXin Li _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
454*67e74705SXin Li   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
455*67e74705SXin Li                (__v8sf) _mm256_setzero_ps(),
456*67e74705SXin Li                (__mmask8) __U,
457*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
458*67e74705SXin Li }
459*67e74705SXin Li 
460*67e74705SXin Li #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({        \
461*67e74705SXin Li   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
462*67e74705SXin Li                                           (__v8sf)_mm256_setzero_ps(), \
463*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
464*67e74705SXin Li 
465*67e74705SXin Li #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
466*67e74705SXin Li   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
467*67e74705SXin Li                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
468*67e74705SXin Li                                           (int)(R)); })
469*67e74705SXin Li 
470*67e74705SXin Li #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
471*67e74705SXin Li   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
472*67e74705SXin Li                                           (__v8sf)_mm256_setzero_ps(), \
473*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
474*67e74705SXin Li 
475*67e74705SXin Li 
476*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epi64(__m512d __A)477*67e74705SXin Li _mm512_cvttpd_epi64 (__m512d __A) {
478*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
479*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
480*67e74705SXin Li                  (__mmask8) -1,
481*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
482*67e74705SXin Li }
483*67e74705SXin Li 
484*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)485*67e74705SXin Li _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
486*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
487*67e74705SXin Li                  (__v8di) __W,
488*67e74705SXin Li                  (__mmask8) __U,
489*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
490*67e74705SXin Li }
491*67e74705SXin Li 
492*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epi64(__mmask8 __U,__m512d __A)493*67e74705SXin Li _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
494*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
495*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
496*67e74705SXin Li                  (__mmask8) __U,
497*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
498*67e74705SXin Li }
499*67e74705SXin Li 
500*67e74705SXin Li #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({             \
501*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
502*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
503*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
504*67e74705SXin Li 
505*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
506*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
507*67e74705SXin Li                                             (__v8di)(__m512i)(W), \
508*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
509*67e74705SXin Li 
510*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
511*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
512*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
513*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
514*67e74705SXin Li 
515*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epu64(__m512d __A)516*67e74705SXin Li _mm512_cvttpd_epu64 (__m512d __A) {
517*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
518*67e74705SXin Li                   (__v8di) _mm512_setzero_si512(),
519*67e74705SXin Li                   (__mmask8) -1,
520*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
521*67e74705SXin Li }
522*67e74705SXin Li 
523*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)524*67e74705SXin Li _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
525*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
526*67e74705SXin Li                   (__v8di) __W,
527*67e74705SXin Li                   (__mmask8) __U,
528*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
529*67e74705SXin Li }
530*67e74705SXin Li 
531*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epu64(__mmask8 __U,__m512d __A)532*67e74705SXin Li _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
533*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
534*67e74705SXin Li                   (__v8di) _mm512_setzero_si512(),
535*67e74705SXin Li                   (__mmask8) __U,
536*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
537*67e74705SXin Li }
538*67e74705SXin Li 
539*67e74705SXin Li #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({              \
540*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
541*67e74705SXin Li                                              (__v8di)_mm512_setzero_si512(), \
542*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
543*67e74705SXin Li 
544*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
545*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
546*67e74705SXin Li                                              (__v8di)(__m512i)(W), \
547*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
548*67e74705SXin Li 
549*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({   \
550*67e74705SXin Li   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
551*67e74705SXin Li                                              (__v8di)_mm512_setzero_si512(), \
552*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
553*67e74705SXin Li 
554*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epi64(__m256 __A)555*67e74705SXin Li _mm512_cvttps_epi64 (__m256 __A) {
556*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
557*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
558*67e74705SXin Li                  (__mmask8) -1,
559*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
560*67e74705SXin Li }
561*67e74705SXin Li 
562*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epi64(__m512i __W,__mmask8 __U,__m256 __A)563*67e74705SXin Li _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
564*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
565*67e74705SXin Li                  (__v8di) __W,
566*67e74705SXin Li                  (__mmask8) __U,
567*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
568*67e74705SXin Li }
569*67e74705SXin Li 
570*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epi64(__mmask8 __U,__m256 __A)571*67e74705SXin Li _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
572*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
573*67e74705SXin Li                  (__v8di) _mm512_setzero_si512(),
574*67e74705SXin Li                  (__mmask8) __U,
575*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
576*67e74705SXin Li }
577*67e74705SXin Li 
578*67e74705SXin Li #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({            \
579*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
580*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
581*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
582*67e74705SXin Li 
583*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
584*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
585*67e74705SXin Li                                             (__v8di)(__m512i)(W), \
586*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
587*67e74705SXin Li 
588*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({  \
589*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
590*67e74705SXin Li                                             (__v8di)_mm512_setzero_si512(), \
591*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
592*67e74705SXin Li 
593*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epu64(__m256 __A)594*67e74705SXin Li _mm512_cvttps_epu64 (__m256 __A) {
595*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
596*67e74705SXin Li                   (__v8di) _mm512_setzero_si512(),
597*67e74705SXin Li                   (__mmask8) -1,
598*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
599*67e74705SXin Li }
600*67e74705SXin Li 
601*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epu64(__m512i __W,__mmask8 __U,__m256 __A)602*67e74705SXin Li _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
603*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
604*67e74705SXin Li                   (__v8di) __W,
605*67e74705SXin Li                   (__mmask8) __U,
606*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
607*67e74705SXin Li }
608*67e74705SXin Li 
609*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epu64(__mmask8 __U,__m256 __A)610*67e74705SXin Li _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
611*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
612*67e74705SXin Li                   (__v8di) _mm512_setzero_si512(),
613*67e74705SXin Li                   (__mmask8) __U,
614*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
615*67e74705SXin Li }
616*67e74705SXin Li 
617*67e74705SXin Li #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({            \
618*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
619*67e74705SXin Li                                              (__v8di)_mm512_setzero_si512(), \
620*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
621*67e74705SXin Li 
622*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
623*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
624*67e74705SXin Li                                              (__v8di)(__m512i)(W), \
625*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
626*67e74705SXin Li 
627*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({  \
628*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
629*67e74705SXin Li                                              (__v8di)_mm512_setzero_si512(), \
630*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
631*67e74705SXin Li 
632*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepu64_pd(__m512i __A)633*67e74705SXin Li _mm512_cvtepu64_pd (__m512i __A) {
634*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
635*67e74705SXin Li                  (__v8df) _mm512_setzero_pd(),
636*67e74705SXin Li                  (__mmask8) -1,
637*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
638*67e74705SXin Li }
639*67e74705SXin Li 
640*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_pd(__m512d __W,__mmask8 __U,__m512i __A)641*67e74705SXin Li _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
642*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
643*67e74705SXin Li                  (__v8df) __W,
644*67e74705SXin Li                  (__mmask8) __U,
645*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
646*67e74705SXin Li }
647*67e74705SXin Li 
648*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_pd(__mmask8 __U,__m512i __A)649*67e74705SXin Li _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
650*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
651*67e74705SXin Li                  (__v8df) _mm512_setzero_pd(),
652*67e74705SXin Li                  (__mmask8) __U,
653*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
654*67e74705SXin Li }
655*67e74705SXin Li 
656*67e74705SXin Li #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({          \
657*67e74705SXin Li   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
658*67e74705SXin Li                                             (__v8df)_mm512_setzero_pd(), \
659*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
660*67e74705SXin Li 
661*67e74705SXin Li #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
662*67e74705SXin Li   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
663*67e74705SXin Li                                             (__v8df)(__m512d)(W), \
664*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
665*67e74705SXin Li 
666*67e74705SXin Li 
667*67e74705SXin Li #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
668*67e74705SXin Li   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
669*67e74705SXin Li                                             (__v8df)_mm512_setzero_pd(), \
670*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
671*67e74705SXin Li 
672*67e74705SXin Li 
673*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepu64_ps(__m512i __A)674*67e74705SXin Li _mm512_cvtepu64_ps (__m512i __A) {
675*67e74705SXin Li   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
676*67e74705SXin Li                 (__v8sf) _mm256_setzero_ps(),
677*67e74705SXin Li                 (__mmask8) -1,
678*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
679*67e74705SXin Li }
680*67e74705SXin Li 
681*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_ps(__m256 __W,__mmask8 __U,__m512i __A)682*67e74705SXin Li _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
683*67e74705SXin Li   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
684*67e74705SXin Li                 (__v8sf) __W,
685*67e74705SXin Li                 (__mmask8) __U,
686*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
687*67e74705SXin Li }
688*67e74705SXin Li 
689*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_ps(__mmask8 __U,__m512i __A)690*67e74705SXin Li _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
691*67e74705SXin Li   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
692*67e74705SXin Li                 (__v8sf) _mm256_setzero_ps(),
693*67e74705SXin Li                 (__mmask8) __U,
694*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
695*67e74705SXin Li }
696*67e74705SXin Li 
697*67e74705SXin Li #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({         \
698*67e74705SXin Li   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
699*67e74705SXin Li                                            (__v8sf)_mm256_setzero_ps(), \
700*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
701*67e74705SXin Li 
702*67e74705SXin Li #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
703*67e74705SXin Li   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
704*67e74705SXin Li                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
705*67e74705SXin Li                                            (int)(R)); })
706*67e74705SXin Li 
707*67e74705SXin Li #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
708*67e74705SXin Li   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
709*67e74705SXin Li                                            (__v8sf)_mm256_setzero_ps(), \
710*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
711*67e74705SXin Li 
712*67e74705SXin Li #define _mm512_range_pd(A, B, C) __extension__ ({                     \
713*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
714*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
715*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
716*67e74705SXin Li                                           (__mmask8)-1, \
717*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
718*67e74705SXin Li 
719*67e74705SXin Li #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({      \
720*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
721*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
722*67e74705SXin Li                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
723*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
724*67e74705SXin Li 
725*67e74705SXin Li #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({           \
726*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
727*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
728*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
729*67e74705SXin Li                                           (__mmask8)(U), \
730*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
731*67e74705SXin Li 
732*67e74705SXin Li #define _mm512_range_round_pd(A, B, C, R) __extension__ ({           \
733*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
734*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
735*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
736*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
737*67e74705SXin Li 
738*67e74705SXin Li #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
739*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
740*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
741*67e74705SXin Li                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
742*67e74705SXin Li                                           (int)(R)); })
743*67e74705SXin Li 
744*67e74705SXin Li #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
745*67e74705SXin Li   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
746*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(C), \
747*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
748*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
749*67e74705SXin Li 
750*67e74705SXin Li #define _mm512_range_ps(A, B, C) __extension__ ({                       \
751*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
752*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
753*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
754*67e74705SXin Li                                          (__mmask16)-1, \
755*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
756*67e74705SXin Li 
757*67e74705SXin Li #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({         \
758*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
759*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
760*67e74705SXin Li                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
761*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
762*67e74705SXin Li 
763*67e74705SXin Li #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({      \
764*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
765*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
766*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
767*67e74705SXin Li                                          (__mmask16)(U), \
768*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
769*67e74705SXin Li 
770*67e74705SXin Li #define _mm512_range_round_ps(A, B, C, R) __extension__ ({         \
771*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
772*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
773*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
774*67e74705SXin Li                                          (__mmask16)-1, (int)(R)); })
775*67e74705SXin Li 
776*67e74705SXin Li #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
777*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
778*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
779*67e74705SXin Li                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
780*67e74705SXin Li                                          (int)(R)); })
781*67e74705SXin Li 
782*67e74705SXin Li #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
783*67e74705SXin Li   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
784*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(C), \
785*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
786*67e74705SXin Li                                          (__mmask16)(U), (int)(R)); })
787*67e74705SXin Li 
788*67e74705SXin Li #define _mm_range_round_ss(A, B, C, R) __extension__ ({           \
789*67e74705SXin Li   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
790*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
791*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
792*67e74705SXin Li                                                (__mmask8) -1, (int)(C),\
793*67e74705SXin Li                                                (int)(R)); })
794*67e74705SXin Li 
795*67e74705SXin Li #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
796*67e74705SXin Li 
797*67e74705SXin Li #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
798*67e74705SXin Li   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
799*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
800*67e74705SXin Li                                                (__v4sf)(__m128)(W),\
801*67e74705SXin Li                                                (__mmask8)(U), (int)(C),\
802*67e74705SXin Li                                                (int)(R)); })
803*67e74705SXin Li 
804*67e74705SXin Li #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
805*67e74705SXin Li 
806*67e74705SXin Li #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
807*67e74705SXin Li   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
808*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
809*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
810*67e74705SXin Li                                                (__mmask8)(U), (int)(C),\
811*67e74705SXin Li                                                (int)(R)); })
812*67e74705SXin Li 
813*67e74705SXin Li #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
814*67e74705SXin Li 
815*67e74705SXin Li #define _mm_range_round_sd(A, B, C, R) __extension__ ({           \
816*67e74705SXin Li   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
817*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
818*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
819*67e74705SXin Li                                                 (__mmask8) -1, (int)(C),\
820*67e74705SXin Li                                                 (int)(R)); })
821*67e74705SXin Li 
822*67e74705SXin Li #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
823*67e74705SXin Li 
824*67e74705SXin Li #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
825*67e74705SXin Li   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
826*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
827*67e74705SXin Li                                                 (__v2df)(__m128d)(W),\
828*67e74705SXin Li                                                 (__mmask8)(U), (int)(C),\
829*67e74705SXin Li                                                 (int)(R)); })
830*67e74705SXin Li 
831*67e74705SXin Li #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
832*67e74705SXin Li 
833*67e74705SXin Li #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
834*67e74705SXin Li   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
835*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
836*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
837*67e74705SXin Li                                                 (__mmask8)(U), (int)(C),\
838*67e74705SXin Li                                                 (int)(R)); })
839*67e74705SXin Li 
840*67e74705SXin Li #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
841*67e74705SXin Li 
842*67e74705SXin Li #define _mm512_reduce_pd(A, B) __extension__ ({             \
843*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
844*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
845*67e74705SXin Li                                            (__mmask8)-1, \
846*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
847*67e74705SXin Li 
848*67e74705SXin Li #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
849*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
851*67e74705SXin Li                                            (__mmask8)(U), \
852*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
853*67e74705SXin Li 
854*67e74705SXin Li #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({  \
855*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
856*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
857*67e74705SXin Li                                            (__mmask8)(U), \
858*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
859*67e74705SXin Li 
860*67e74705SXin Li #define _mm512_reduce_ps(A, B) __extension__ ({              \
861*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
862*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
863*67e74705SXin Li                                           (__mmask16)-1, \
864*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
865*67e74705SXin Li 
866*67e74705SXin Li #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({   \
867*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
868*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
869*67e74705SXin Li                                           (__mmask16)(U), \
870*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
871*67e74705SXin Li 
872*67e74705SXin Li #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({       \
873*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
874*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
875*67e74705SXin Li                                           (__mmask16)(U), \
876*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
877*67e74705SXin Li 
878*67e74705SXin Li #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
879*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
880*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
881*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
882*67e74705SXin Li 
883*67e74705SXin Li #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
884*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
885*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
886*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
887*67e74705SXin Li 
888*67e74705SXin Li #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
889*67e74705SXin Li   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
890*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
891*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
892*67e74705SXin Li 
893*67e74705SXin Li #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
894*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
895*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
896*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
897*67e74705SXin Li 
898*67e74705SXin Li #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
899*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
900*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
901*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
902*67e74705SXin Li 
903*67e74705SXin Li #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
904*67e74705SXin Li   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
905*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
906*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
907*67e74705SXin Li 
908*67e74705SXin Li #define _mm_reduce_ss(A, B, C) __extension__ ({              \
909*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
910*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
911*67e74705SXin Li                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
912*67e74705SXin Li                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
913*67e74705SXin Li 
914*67e74705SXin Li #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
915*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
916*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
917*67e74705SXin Li                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
918*67e74705SXin Li                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
919*67e74705SXin Li 
920*67e74705SXin Li #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
921*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
922*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
923*67e74705SXin Li                                        (__v4sf)_mm_setzero_ps(), \
924*67e74705SXin Li                                        (__mmask8)(U), (int)(C), \
925*67e74705SXin Li                                        _MM_FROUND_CUR_DIRECTION); })
926*67e74705SXin Li 
927*67e74705SXin Li #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
928*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
929*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
930*67e74705SXin Li                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
931*67e74705SXin Li                                        (int)(C), (int)(R)); })
932*67e74705SXin Li 
933*67e74705SXin Li #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
934*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
935*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
936*67e74705SXin Li                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
937*67e74705SXin Li                                        (int)(C), (int)(R)); })
938*67e74705SXin Li 
939*67e74705SXin Li #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
940*67e74705SXin Li   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
941*67e74705SXin Li                                        (__v4sf)(__m128)(B), \
942*67e74705SXin Li                                        (__v4sf)_mm_setzero_ps(), \
943*67e74705SXin Li                                        (__mmask8)(U), (int)(C), (int)(R)); })
944*67e74705SXin Li 
945*67e74705SXin Li #define _mm_reduce_sd(A, B, C) __extension__ ({              \
946*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
947*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
948*67e74705SXin Li                                         (__v2df)_mm_setzero_pd(), \
949*67e74705SXin Li                                         (__mmask8)-1, (int)(C), \
950*67e74705SXin Li                                         _MM_FROUND_CUR_DIRECTION); })
951*67e74705SXin Li 
952*67e74705SXin Li #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
953*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
954*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
955*67e74705SXin Li                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
956*67e74705SXin Li                                         (int)(C), _MM_FROUND_CUR_DIRECTION); })
957*67e74705SXin Li 
958*67e74705SXin Li #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
959*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
960*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
961*67e74705SXin Li                                         (__v2df)_mm_setzero_pd(), \
962*67e74705SXin Li                                         (__mmask8)(U), (int)(C), \
963*67e74705SXin Li                                         _MM_FROUND_CUR_DIRECTION); })
964*67e74705SXin Li 
965*67e74705SXin Li #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
966*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
967*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
968*67e74705SXin Li                                         (__v2df)_mm_setzero_pd(), \
969*67e74705SXin Li                                         (__mmask8)-1, (int)(C), (int)(R)); })
970*67e74705SXin Li 
971*67e74705SXin Li #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
972*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
973*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
974*67e74705SXin Li                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
975*67e74705SXin Li                                         (int)(C), (int)(R)); })
976*67e74705SXin Li 
977*67e74705SXin Li #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
978*67e74705SXin Li   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
979*67e74705SXin Li                                         (__v2df)(__m128d)(B), \
980*67e74705SXin Li                                         (__v2df)_mm_setzero_pd(), \
981*67e74705SXin Li                                         (__mmask8)(U), (int)(C), (int)(R)); })
982*67e74705SXin Li 
983*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_movepi32_mask(__m512i __A)984*67e74705SXin Li _mm512_movepi32_mask (__m512i __A)
985*67e74705SXin Li {
986*67e74705SXin Li   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
987*67e74705SXin Li }
988*67e74705SXin Li 
989*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi32(__mmask16 __A)990*67e74705SXin Li _mm512_movm_epi32 (__mmask16 __A)
991*67e74705SXin Li {
992*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
993*67e74705SXin Li }
994*67e74705SXin Li 
995*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi64(__mmask8 __A)996*67e74705SXin Li _mm512_movm_epi64 (__mmask8 __A)
997*67e74705SXin Li {
998*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
999*67e74705SXin Li }
1000*67e74705SXin Li 
1001*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_movepi64_mask(__m512i __A)1002*67e74705SXin Li _mm512_movepi64_mask (__m512i __A)
1003*67e74705SXin Li {
1004*67e74705SXin Li   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1005*67e74705SXin Li }
1006*67e74705SXin Li 
1007*67e74705SXin Li 
1008*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2(__m128 __A)1009*67e74705SXin Li _mm512_broadcast_f32x2 (__m128 __A)
1010*67e74705SXin Li {
1011*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1012*67e74705SXin Li                 (__v16sf)_mm512_undefined_ps(),
1013*67e74705SXin Li                 (__mmask16) -1);
1014*67e74705SXin Li }
1015*67e74705SXin Li 
1016*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2(__m512 __O,__mmask16 __M,__m128 __A)1017*67e74705SXin Li _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1018*67e74705SXin Li {
1019*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1020*67e74705SXin Li                 (__v16sf)
1021*67e74705SXin Li                 __O, __M);
1022*67e74705SXin Li }
1023*67e74705SXin Li 
1024*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2(__mmask16 __M,__m128 __A)1025*67e74705SXin Li _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1026*67e74705SXin Li {
1027*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1028*67e74705SXin Li                 (__v16sf)_mm512_setzero_ps (),
1029*67e74705SXin Li                 __M);
1030*67e74705SXin Li }
1031*67e74705SXin Li 
1032*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x8(__m256 __A)1033*67e74705SXin Li _mm512_broadcast_f32x8 (__m256 __A)
1034*67e74705SXin Li {
1035*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1036*67e74705SXin Li                 _mm512_undefined_ps(),
1037*67e74705SXin Li                 (__mmask16) -1);
1038*67e74705SXin Li }
1039*67e74705SXin Li 
1040*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x8(__m512 __O,__mmask16 __M,__m256 __A)1041*67e74705SXin Li _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
1042*67e74705SXin Li {
1043*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1044*67e74705SXin Li                 (__v16sf)__O,
1045*67e74705SXin Li                 __M);
1046*67e74705SXin Li }
1047*67e74705SXin Li 
1048*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x8(__mmask16 __M,__m256 __A)1049*67e74705SXin Li _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
1050*67e74705SXin Li {
1051*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1052*67e74705SXin Li                 (__v16sf)_mm512_setzero_ps (),
1053*67e74705SXin Li                 __M);
1054*67e74705SXin Li }
1055*67e74705SXin Li 
1056*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_broadcast_f64x2(__m128d __A)1057*67e74705SXin Li _mm512_broadcast_f64x2 (__m128d __A)
1058*67e74705SXin Li {
1059*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1060*67e74705SXin Li                  (__v8df)_mm512_undefined_pd(),
1061*67e74705SXin Li                  (__mmask8) -1);
1062*67e74705SXin Li }
1063*67e74705SXin Li 
1064*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f64x2(__m512d __O,__mmask8 __M,__m128d __A)1065*67e74705SXin Li _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
1066*67e74705SXin Li {
1067*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1068*67e74705SXin Li                  (__v8df)
1069*67e74705SXin Li                  __O, __M);
1070*67e74705SXin Li }
1071*67e74705SXin Li 
1072*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f64x2(__mmask8 __M,__m128d __A)1073*67e74705SXin Li _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1074*67e74705SXin Li {
1075*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1076*67e74705SXin Li                  (__v8df)_mm512_setzero_ps (),
1077*67e74705SXin Li                  __M);
1078*67e74705SXin Li }
1079*67e74705SXin Li 
1080*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2(__m128i __A)1081*67e74705SXin Li _mm512_broadcast_i32x2 (__m128i __A)
1082*67e74705SXin Li {
1083*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1084*67e74705SXin Li                  (__v16si)_mm512_setzero_si512(),
1085*67e74705SXin Li                  (__mmask16) -1);
1086*67e74705SXin Li }
1087*67e74705SXin Li 
1088*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2(__m512i __O,__mmask16 __M,__m128i __A)1089*67e74705SXin Li _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1090*67e74705SXin Li {
1091*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1092*67e74705SXin Li                  (__v16si)
1093*67e74705SXin Li                  __O, __M);
1094*67e74705SXin Li }
1095*67e74705SXin Li 
1096*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2(__mmask16 __M,__m128i __A)1097*67e74705SXin Li _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1098*67e74705SXin Li {
1099*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1100*67e74705SXin Li                  (__v16si)_mm512_setzero_si512 (),
1101*67e74705SXin Li                  __M);
1102*67e74705SXin Li }
1103*67e74705SXin Li 
1104*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x8(__m256i __A)1105*67e74705SXin Li _mm512_broadcast_i32x8 (__m256i __A)
1106*67e74705SXin Li {
1107*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1108*67e74705SXin Li                  (__v16si)_mm512_setzero_si512(),
1109*67e74705SXin Li                  (__mmask16) -1);
1110*67e74705SXin Li }
1111*67e74705SXin Li 
1112*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x8(__m512i __O,__mmask16 __M,__m256i __A)1113*67e74705SXin Li _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
1114*67e74705SXin Li {
1115*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1116*67e74705SXin Li                  (__v16si)__O,
1117*67e74705SXin Li                  __M);
1118*67e74705SXin Li }
1119*67e74705SXin Li 
1120*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x8(__mmask16 __M,__m256i __A)1121*67e74705SXin Li _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
1122*67e74705SXin Li {
1123*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1124*67e74705SXin Li                  (__v16si)
1125*67e74705SXin Li                  _mm512_setzero_si512 (),
1126*67e74705SXin Li                  __M);
1127*67e74705SXin Li }
1128*67e74705SXin Li 
1129*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i64x2(__m128i __A)1130*67e74705SXin Li _mm512_broadcast_i64x2 (__m128i __A)
1131*67e74705SXin Li {
1132*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1133*67e74705SXin Li                  (__v8di)_mm512_setzero_si512(),
1134*67e74705SXin Li                  (__mmask8) -1);
1135*67e74705SXin Li }
1136*67e74705SXin Li 
1137*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i64x2(__m512i __O,__mmask8 __M,__m128i __A)1138*67e74705SXin Li _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
1139*67e74705SXin Li {
1140*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1141*67e74705SXin Li                  (__v8di)
1142*67e74705SXin Li                  __O, __M);
1143*67e74705SXin Li }
1144*67e74705SXin Li 
1145*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i64x2(__mmask8 __M,__m128i __A)1146*67e74705SXin Li _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147*67e74705SXin Li {
1148*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1149*67e74705SXin Li                  (__v8di)_mm512_setzero_si512 (),
1150*67e74705SXin Li                  __M);
1151*67e74705SXin Li }
1152*67e74705SXin Li 
1153*67e74705SXin Li #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1154*67e74705SXin Li   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1155*67e74705SXin Li                                            (__v8sf)_mm256_setzero_ps(), \
1156*67e74705SXin Li                                            (__mmask8)-1); })
1157*67e74705SXin Li 
1158*67e74705SXin Li #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1159*67e74705SXin Li   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1160*67e74705SXin Li                                            (__v8sf)(__m256)(W), \
1161*67e74705SXin Li                                            (__mmask8)(U)); })
1162*67e74705SXin Li 
1163*67e74705SXin Li #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1164*67e74705SXin Li   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1165*67e74705SXin Li                                            (__v8sf)_mm256_setzero_ps(), \
1166*67e74705SXin Li                                            (__mmask8)(U)); })
1167*67e74705SXin Li 
1168*67e74705SXin Li #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1169*67e74705SXin Li   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1170*67e74705SXin Li                                                 (int)(imm), \
1171*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
1172*67e74705SXin Li                                                 (__mmask8)-1); })
1173*67e74705SXin Li 
1174*67e74705SXin Li #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1175*67e74705SXin Li   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1176*67e74705SXin Li                                                 (int)(imm), \
1177*67e74705SXin Li                                                 (__v2df)(__m128d)(W), \
1178*67e74705SXin Li                                                 (__mmask8)(U)); })
1179*67e74705SXin Li 
1180*67e74705SXin Li #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1181*67e74705SXin Li   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1182*67e74705SXin Li                                                 (int)(imm), \
1183*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
1184*67e74705SXin Li                                                 (__mmask8)(U)); })
1185*67e74705SXin Li 
1186*67e74705SXin Li #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1187*67e74705SXin Li   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1188*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
1189*67e74705SXin Li                                             (__mmask8)-1); })
1190*67e74705SXin Li 
1191*67e74705SXin Li #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1192*67e74705SXin Li   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1193*67e74705SXin Li                                             (__v8si)(__m256i)(W), \
1194*67e74705SXin Li                                             (__mmask8)(U)); })
1195*67e74705SXin Li 
1196*67e74705SXin Li #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1197*67e74705SXin Li   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1198*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
1199*67e74705SXin Li                                             (__mmask8)(U)); })
1200*67e74705SXin Li 
1201*67e74705SXin Li #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1202*67e74705SXin Li   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1203*67e74705SXin Li                                                 (int)(imm), \
1204*67e74705SXin Li                                                 (__v2di)_mm_setzero_di(), \
1205*67e74705SXin Li                                                 (__mmask8)-1); })
1206*67e74705SXin Li 
1207*67e74705SXin Li #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1208*67e74705SXin Li   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1209*67e74705SXin Li                                                 (int)(imm), \
1210*67e74705SXin Li                                                 (__v2di)(__m128i)(W), \
1211*67e74705SXin Li                                                 (__mmask8)(U)); })
1212*67e74705SXin Li 
1213*67e74705SXin Li #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1214*67e74705SXin Li   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1215*67e74705SXin Li                                                 (int)(imm), \
1216*67e74705SXin Li                                                 (__v2di)_mm_setzero_di(), \
1217*67e74705SXin Li                                                 (__mmask8)(U)); })
1218*67e74705SXin Li 
1219*67e74705SXin Li #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1220*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1221*67e74705SXin Li                                           (__v8sf)(__m256)(B), (int)(imm), \
1222*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
1223*67e74705SXin Li                                           (__mmask16)-1); })
1224*67e74705SXin Li 
1225*67e74705SXin Li #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1226*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1227*67e74705SXin Li                                           (__v8sf)(__m256)(B), (int)(imm), \
1228*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
1229*67e74705SXin Li                                           (__mmask16)(U)); })
1230*67e74705SXin Li 
1231*67e74705SXin Li #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1232*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1233*67e74705SXin Li                                           (__v8sf)(__m256)(B), (int)(imm), \
1234*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
1235*67e74705SXin Li                                           (__mmask16)(U)); })
1236*67e74705SXin Li 
1237*67e74705SXin Li #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1238*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1239*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
1240*67e74705SXin Li                                                (int)(imm), \
1241*67e74705SXin Li                                                (__v8df)_mm512_setzero_pd(), \
1242*67e74705SXin Li                                                (__mmask8)-1); })
1243*67e74705SXin Li 
1244*67e74705SXin Li #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1245*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1246*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
1247*67e74705SXin Li                                                (int)(imm), \
1248*67e74705SXin Li                                                (__v8df)(__m512d)(W), \
1249*67e74705SXin Li                                                (__mmask8)(U)); })
1250*67e74705SXin Li 
1251*67e74705SXin Li #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1252*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1253*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
1254*67e74705SXin Li                                                (int)(imm), \
1255*67e74705SXin Li                                                (__v8df)_mm512_setzero_pd(), \
1256*67e74705SXin Li                                                (__mmask8)(U)); })
1257*67e74705SXin Li 
1258*67e74705SXin Li #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1259*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1260*67e74705SXin Li                                            (__v8si)(__m256i)(B), (int)(imm), \
1261*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
1262*67e74705SXin Li                                            (__mmask16)-1); })
1263*67e74705SXin Li 
1264*67e74705SXin Li #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1265*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1266*67e74705SXin Li                                            (__v8si)(__m256i)(B), (int)(imm), \
1267*67e74705SXin Li                                            (__v16si)(__m512i)(W), \
1268*67e74705SXin Li                                            (__mmask16)(U)); })
1269*67e74705SXin Li 
1270*67e74705SXin Li #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1271*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1272*67e74705SXin Li                                            (__v8si)(__m256i)(B), (int)(imm), \
1273*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
1274*67e74705SXin Li                                            (__mmask16)(U)); })
1275*67e74705SXin Li 
1276*67e74705SXin Li #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1277*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1278*67e74705SXin Li                                                (__v2di)(__m128i)(B), \
1279*67e74705SXin Li                                                (int)(imm), \
1280*67e74705SXin Li                                                (__v8di)_mm512_setzero_si512(), \
1281*67e74705SXin Li                                                (__mmask8)-1); })
1282*67e74705SXin Li 
1283*67e74705SXin Li #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1284*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1285*67e74705SXin Li                                                (__v2di)(__m128i)(B), \
1286*67e74705SXin Li                                                (int)(imm), \
1287*67e74705SXin Li                                                (__v8di)(__m512i)(W), \
1288*67e74705SXin Li                                                (__mmask8)(U)); })
1289*67e74705SXin Li 
1290*67e74705SXin Li #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1291*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1292*67e74705SXin Li                                                (__v2di)(__m128i)(B), \
1293*67e74705SXin Li                                                (int)(imm), \
1294*67e74705SXin Li                                                (__v8di)_mm512_setzero_si512(), \
1295*67e74705SXin Li                                                (__mmask8)(U)); })
1296*67e74705SXin Li 
1297*67e74705SXin Li #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1298*67e74705SXin Li   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1299*67e74705SXin Li                                               (int)(imm), (__mmask16)(U)); })
1300*67e74705SXin Li 
1301*67e74705SXin Li #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1302*67e74705SXin Li   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1303*67e74705SXin Li                                               (int)(imm), (__mmask16)-1); })
1304*67e74705SXin Li 
1305*67e74705SXin Li #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1306*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1307*67e74705SXin Li                                              (__mmask8)(U)); })
1308*67e74705SXin Li 
1309*67e74705SXin Li #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1310*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1311*67e74705SXin Li                                              (__mmask8)-1); })
1312*67e74705SXin Li 
1313*67e74705SXin Li #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1314*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1315*67e74705SXin Li                                           (__mmask8)-1); })
1316*67e74705SXin Li 
1317*67e74705SXin Li #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1318*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1319*67e74705SXin Li                                           (__mmask8)(U)); })
1320*67e74705SXin Li 
1321*67e74705SXin Li #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1322*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1323*67e74705SXin Li                                           (__mmask8)-1); })
1324*67e74705SXin Li 
1325*67e74705SXin Li #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1326*67e74705SXin Li   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1327*67e74705SXin Li                                           (__mmask8)(U)); })
1328*67e74705SXin Li 
1329*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
1330*67e74705SXin Li 
1331*67e74705SXin Li #endif
1332