1*67e74705SXin Li /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2*67e74705SXin Li *
3*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li * furnished to do so, subject to the following conditions:
9*67e74705SXin Li *
10*67e74705SXin Li * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li * all copies or substantial portions of the Software.
12*67e74705SXin Li *
13*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li * THE SOFTWARE.
20*67e74705SXin Li *
21*67e74705SXin Li *===-----------------------------------------------------------------------===
22*67e74705SXin Li */
23*67e74705SXin Li
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li
28*67e74705SXin Li #ifndef __AVX512DQINTRIN_H
29*67e74705SXin Li #define __AVX512DQINTRIN_H
30*67e74705SXin Li
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
33*67e74705SXin Li
34*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi64(__m512i __A,__m512i __B)35*67e74705SXin Li _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36*67e74705SXin Li return (__m512i) ((__v8du) __A * (__v8du) __B);
37*67e74705SXin Li }
38*67e74705SXin Li
39*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mullo_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)40*67e74705SXin Li _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41*67e74705SXin Li return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
42*67e74705SXin Li (__v8di) __B,
43*67e74705SXin Li (__v8di) __W,
44*67e74705SXin Li (__mmask8) __U);
45*67e74705SXin Li }
46*67e74705SXin Li
47*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mullo_epi64(__mmask8 __U,__m512i __A,__m512i __B)48*67e74705SXin Li _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
49*67e74705SXin Li return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
50*67e74705SXin Li (__v8di) __B,
51*67e74705SXin Li (__v8di)
52*67e74705SXin Li _mm512_setzero_si512 (),
53*67e74705SXin Li (__mmask8) __U);
54*67e74705SXin Li }
55*67e74705SXin Li
56*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_xor_pd(__m512d __A,__m512d __B)57*67e74705SXin Li _mm512_xor_pd (__m512d __A, __m512d __B) {
58*67e74705SXin Li return (__m512d) ((__v8du) __A ^ (__v8du) __B);
59*67e74705SXin Li }
60*67e74705SXin Li
61*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_xor_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)62*67e74705SXin Li _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
63*67e74705SXin Li return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
64*67e74705SXin Li (__v8df) __B,
65*67e74705SXin Li (__v8df) __W,
66*67e74705SXin Li (__mmask8) __U);
67*67e74705SXin Li }
68*67e74705SXin Li
69*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_xor_pd(__mmask8 __U,__m512d __A,__m512d __B)70*67e74705SXin Li _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
71*67e74705SXin Li return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
72*67e74705SXin Li (__v8df) __B,
73*67e74705SXin Li (__v8df)
74*67e74705SXin Li _mm512_setzero_pd (),
75*67e74705SXin Li (__mmask8) __U);
76*67e74705SXin Li }
77*67e74705SXin Li
78*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_xor_ps(__m512 __A,__m512 __B)79*67e74705SXin Li _mm512_xor_ps (__m512 __A, __m512 __B) {
80*67e74705SXin Li return (__m512) ((__v16su) __A ^ (__v16su) __B);
81*67e74705SXin Li }
82*67e74705SXin Li
83*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_xor_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)84*67e74705SXin Li _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
85*67e74705SXin Li return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
86*67e74705SXin Li (__v16sf) __B,
87*67e74705SXin Li (__v16sf) __W,
88*67e74705SXin Li (__mmask16) __U);
89*67e74705SXin Li }
90*67e74705SXin Li
91*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_xor_ps(__mmask16 __U,__m512 __A,__m512 __B)92*67e74705SXin Li _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
93*67e74705SXin Li return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
94*67e74705SXin Li (__v16sf) __B,
95*67e74705SXin Li (__v16sf)
96*67e74705SXin Li _mm512_setzero_ps (),
97*67e74705SXin Li (__mmask16) __U);
98*67e74705SXin Li }
99*67e74705SXin Li
100*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_or_pd(__m512d __A,__m512d __B)101*67e74705SXin Li _mm512_or_pd (__m512d __A, __m512d __B) {
102*67e74705SXin Li return (__m512d) ((__v8du) __A | (__v8du) __B);
103*67e74705SXin Li }
104*67e74705SXin Li
105*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_or_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)106*67e74705SXin Li _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
107*67e74705SXin Li return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
108*67e74705SXin Li (__v8df) __B,
109*67e74705SXin Li (__v8df) __W,
110*67e74705SXin Li (__mmask8) __U);
111*67e74705SXin Li }
112*67e74705SXin Li
113*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_or_pd(__mmask8 __U,__m512d __A,__m512d __B)114*67e74705SXin Li _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
115*67e74705SXin Li return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
116*67e74705SXin Li (__v8df) __B,
117*67e74705SXin Li (__v8df)
118*67e74705SXin Li _mm512_setzero_pd (),
119*67e74705SXin Li (__mmask8) __U);
120*67e74705SXin Li }
121*67e74705SXin Li
122*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_or_ps(__m512 __A,__m512 __B)123*67e74705SXin Li _mm512_or_ps (__m512 __A, __m512 __B) {
124*67e74705SXin Li return (__m512) ((__v16su) __A | (__v16su) __B);
125*67e74705SXin Li }
126*67e74705SXin Li
127*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_or_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)128*67e74705SXin Li _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
129*67e74705SXin Li return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
130*67e74705SXin Li (__v16sf) __B,
131*67e74705SXin Li (__v16sf) __W,
132*67e74705SXin Li (__mmask16) __U);
133*67e74705SXin Li }
134*67e74705SXin Li
135*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_or_ps(__mmask16 __U,__m512 __A,__m512 __B)136*67e74705SXin Li _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
137*67e74705SXin Li return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
138*67e74705SXin Li (__v16sf) __B,
139*67e74705SXin Li (__v16sf)
140*67e74705SXin Li _mm512_setzero_ps (),
141*67e74705SXin Li (__mmask16) __U);
142*67e74705SXin Li }
143*67e74705SXin Li
144*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_and_pd(__m512d __A,__m512d __B)145*67e74705SXin Li _mm512_and_pd (__m512d __A, __m512d __B) {
146*67e74705SXin Li return (__m512d) ((__v8du) __A & (__v8du) __B);
147*67e74705SXin Li }
148*67e74705SXin Li
149*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_and_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)150*67e74705SXin Li _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
151*67e74705SXin Li return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
152*67e74705SXin Li (__v8df) __B,
153*67e74705SXin Li (__v8df) __W,
154*67e74705SXin Li (__mmask8) __U);
155*67e74705SXin Li }
156*67e74705SXin Li
157*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_and_pd(__mmask8 __U,__m512d __A,__m512d __B)158*67e74705SXin Li _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
159*67e74705SXin Li return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
160*67e74705SXin Li (__v8df) __B,
161*67e74705SXin Li (__v8df)
162*67e74705SXin Li _mm512_setzero_pd (),
163*67e74705SXin Li (__mmask8) __U);
164*67e74705SXin Li }
165*67e74705SXin Li
166*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_and_ps(__m512 __A,__m512 __B)167*67e74705SXin Li _mm512_and_ps (__m512 __A, __m512 __B) {
168*67e74705SXin Li return (__m512) ((__v16su) __A & (__v16su) __B);
169*67e74705SXin Li }
170*67e74705SXin Li
171*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_and_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)172*67e74705SXin Li _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
173*67e74705SXin Li return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
174*67e74705SXin Li (__v16sf) __B,
175*67e74705SXin Li (__v16sf) __W,
176*67e74705SXin Li (__mmask16) __U);
177*67e74705SXin Li }
178*67e74705SXin Li
179*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_and_ps(__mmask16 __U,__m512 __A,__m512 __B)180*67e74705SXin Li _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
181*67e74705SXin Li return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
182*67e74705SXin Li (__v16sf) __B,
183*67e74705SXin Li (__v16sf)
184*67e74705SXin Li _mm512_setzero_ps (),
185*67e74705SXin Li (__mmask16) __U);
186*67e74705SXin Li }
187*67e74705SXin Li
188*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_andnot_pd(__m512d __A,__m512d __B)189*67e74705SXin Li _mm512_andnot_pd (__m512d __A, __m512d __B) {
190*67e74705SXin Li return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
191*67e74705SXin Li (__v8df) __B,
192*67e74705SXin Li (__v8df)
193*67e74705SXin Li _mm512_setzero_pd (),
194*67e74705SXin Li (__mmask8) -1);
195*67e74705SXin Li }
196*67e74705SXin Li
197*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_andnot_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)198*67e74705SXin Li _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
199*67e74705SXin Li return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
200*67e74705SXin Li (__v8df) __B,
201*67e74705SXin Li (__v8df) __W,
202*67e74705SXin Li (__mmask8) __U);
203*67e74705SXin Li }
204*67e74705SXin Li
205*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_pd(__mmask8 __U,__m512d __A,__m512d __B)206*67e74705SXin Li _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
207*67e74705SXin Li return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
208*67e74705SXin Li (__v8df) __B,
209*67e74705SXin Li (__v8df)
210*67e74705SXin Li _mm512_setzero_pd (),
211*67e74705SXin Li (__mmask8) __U);
212*67e74705SXin Li }
213*67e74705SXin Li
214*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_andnot_ps(__m512 __A,__m512 __B)215*67e74705SXin Li _mm512_andnot_ps (__m512 __A, __m512 __B) {
216*67e74705SXin Li return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
217*67e74705SXin Li (__v16sf) __B,
218*67e74705SXin Li (__v16sf)
219*67e74705SXin Li _mm512_setzero_ps (),
220*67e74705SXin Li (__mmask16) -1);
221*67e74705SXin Li }
222*67e74705SXin Li
223*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_andnot_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)224*67e74705SXin Li _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
225*67e74705SXin Li return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
226*67e74705SXin Li (__v16sf) __B,
227*67e74705SXin Li (__v16sf) __W,
228*67e74705SXin Li (__mmask16) __U);
229*67e74705SXin Li }
230*67e74705SXin Li
231*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_ps(__mmask16 __U,__m512 __A,__m512 __B)232*67e74705SXin Li _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
233*67e74705SXin Li return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
234*67e74705SXin Li (__v16sf) __B,
235*67e74705SXin Li (__v16sf)
236*67e74705SXin Li _mm512_setzero_ps (),
237*67e74705SXin Li (__mmask16) __U);
238*67e74705SXin Li }
239*67e74705SXin Li
240*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epi64(__m512d __A)241*67e74705SXin Li _mm512_cvtpd_epi64 (__m512d __A) {
242*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
243*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
244*67e74705SXin Li (__mmask8) -1,
245*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
246*67e74705SXin Li }
247*67e74705SXin Li
248*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)249*67e74705SXin Li _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
250*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
251*67e74705SXin Li (__v8di) __W,
252*67e74705SXin Li (__mmask8) __U,
253*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
254*67e74705SXin Li }
255*67e74705SXin Li
256*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epi64(__mmask8 __U,__m512d __A)257*67e74705SXin Li _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
258*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
259*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
260*67e74705SXin Li (__mmask8) __U,
261*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
262*67e74705SXin Li }
263*67e74705SXin Li
264*67e74705SXin Li #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \
265*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
266*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
267*67e74705SXin Li (__mmask8)-1, (int)(R)); })
268*67e74705SXin Li
269*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
270*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
271*67e74705SXin Li (__v8di)(__m512i)(W), \
272*67e74705SXin Li (__mmask8)(U), (int)(R)); })
273*67e74705SXin Li
274*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \
275*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
276*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
277*67e74705SXin Li (__mmask8)(U), (int)(R)); })
278*67e74705SXin Li
279*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epu64(__m512d __A)280*67e74705SXin Li _mm512_cvtpd_epu64 (__m512d __A) {
281*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
282*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
283*67e74705SXin Li (__mmask8) -1,
284*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
285*67e74705SXin Li }
286*67e74705SXin Li
287*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)288*67e74705SXin Li _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
289*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
290*67e74705SXin Li (__v8di) __W,
291*67e74705SXin Li (__mmask8) __U,
292*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
293*67e74705SXin Li }
294*67e74705SXin Li
295*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epu64(__mmask8 __U,__m512d __A)296*67e74705SXin Li _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
297*67e74705SXin Li return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
298*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
299*67e74705SXin Li (__mmask8) __U,
300*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
301*67e74705SXin Li }
302*67e74705SXin Li
303*67e74705SXin Li #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \
304*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
305*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
306*67e74705SXin Li (__mmask8)-1, (int)(R)); })
307*67e74705SXin Li
308*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
309*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
310*67e74705SXin Li (__v8di)(__m512i)(W), \
311*67e74705SXin Li (__mmask8)(U), (int)(R)); })
312*67e74705SXin Li
313*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \
314*67e74705SXin Li (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
315*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
316*67e74705SXin Li (__mmask8)(U), (int)(R)); })
317*67e74705SXin Li
318*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epi64(__m256 __A)319*67e74705SXin Li _mm512_cvtps_epi64 (__m256 __A) {
320*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
321*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
322*67e74705SXin Li (__mmask8) -1,
323*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
324*67e74705SXin Li }
325*67e74705SXin Li
326*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epi64(__m512i __W,__mmask8 __U,__m256 __A)327*67e74705SXin Li _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
328*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
329*67e74705SXin Li (__v8di) __W,
330*67e74705SXin Li (__mmask8) __U,
331*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
332*67e74705SXin Li }
333*67e74705SXin Li
334*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epi64(__mmask8 __U,__m256 __A)335*67e74705SXin Li _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
336*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
337*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
338*67e74705SXin Li (__mmask8) __U,
339*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
340*67e74705SXin Li }
341*67e74705SXin Li
342*67e74705SXin Li #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \
343*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
344*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
345*67e74705SXin Li (__mmask8)-1, (int)(R)); })
346*67e74705SXin Li
347*67e74705SXin Li #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
348*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
349*67e74705SXin Li (__v8di)(__m512i)(W), \
350*67e74705SXin Li (__mmask8)(U), (int)(R)); })
351*67e74705SXin Li
352*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \
353*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
354*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
355*67e74705SXin Li (__mmask8)(U), (int)(R)); })
356*67e74705SXin Li
357*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epu64(__m256 __A)358*67e74705SXin Li _mm512_cvtps_epu64 (__m256 __A) {
359*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
360*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
361*67e74705SXin Li (__mmask8) -1,
362*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
363*67e74705SXin Li }
364*67e74705SXin Li
365*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epu64(__m512i __W,__mmask8 __U,__m256 __A)366*67e74705SXin Li _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
367*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
368*67e74705SXin Li (__v8di) __W,
369*67e74705SXin Li (__mmask8) __U,
370*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
371*67e74705SXin Li }
372*67e74705SXin Li
373*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epu64(__mmask8 __U,__m256 __A)374*67e74705SXin Li _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
375*67e74705SXin Li return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
376*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
377*67e74705SXin Li (__mmask8) __U,
378*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
379*67e74705SXin Li }
380*67e74705SXin Li
381*67e74705SXin Li #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \
382*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
383*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
384*67e74705SXin Li (__mmask8)-1, (int)(R)); })
385*67e74705SXin Li
386*67e74705SXin Li #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
387*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
388*67e74705SXin Li (__v8di)(__m512i)(W), \
389*67e74705SXin Li (__mmask8)(U), (int)(R)); })
390*67e74705SXin Li
391*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \
392*67e74705SXin Li (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
393*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
394*67e74705SXin Li (__mmask8)(U), (int)(R)); })
395*67e74705SXin Li
396*67e74705SXin Li
397*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepi64_pd(__m512i __A)398*67e74705SXin Li _mm512_cvtepi64_pd (__m512i __A) {
399*67e74705SXin Li return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
400*67e74705SXin Li (__v8df) _mm512_setzero_pd(),
401*67e74705SXin Li (__mmask8) -1,
402*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
403*67e74705SXin Li }
404*67e74705SXin Li
405*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_pd(__m512d __W,__mmask8 __U,__m512i __A)406*67e74705SXin Li _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
407*67e74705SXin Li return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
408*67e74705SXin Li (__v8df) __W,
409*67e74705SXin Li (__mmask8) __U,
410*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
411*67e74705SXin Li }
412*67e74705SXin Li
413*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_pd(__mmask8 __U,__m512i __A)414*67e74705SXin Li _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
415*67e74705SXin Li return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
416*67e74705SXin Li (__v8df) _mm512_setzero_pd(),
417*67e74705SXin Li (__mmask8) __U,
418*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
419*67e74705SXin Li }
420*67e74705SXin Li
421*67e74705SXin Li #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \
422*67e74705SXin Li (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
423*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
424*67e74705SXin Li (__mmask8)-1, (int)(R)); })
425*67e74705SXin Li
426*67e74705SXin Li #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
427*67e74705SXin Li (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
428*67e74705SXin Li (__v8df)(__m512d)(W), \
429*67e74705SXin Li (__mmask8)(U), (int)(R)); })
430*67e74705SXin Li
431*67e74705SXin Li #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
432*67e74705SXin Li (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
433*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
434*67e74705SXin Li (__mmask8)(U), (int)(R)); })
435*67e74705SXin Li
436*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepi64_ps(__m512i __A)437*67e74705SXin Li _mm512_cvtepi64_ps (__m512i __A) {
438*67e74705SXin Li return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
439*67e74705SXin Li (__v8sf) _mm256_setzero_ps(),
440*67e74705SXin Li (__mmask8) -1,
441*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
442*67e74705SXin Li }
443*67e74705SXin Li
444*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_ps(__m256 __W,__mmask8 __U,__m512i __A)445*67e74705SXin Li _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
446*67e74705SXin Li return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
447*67e74705SXin Li (__v8sf) __W,
448*67e74705SXin Li (__mmask8) __U,
449*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
450*67e74705SXin Li }
451*67e74705SXin Li
452*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_ps(__mmask8 __U,__m512i __A)453*67e74705SXin Li _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
454*67e74705SXin Li return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
455*67e74705SXin Li (__v8sf) _mm256_setzero_ps(),
456*67e74705SXin Li (__mmask8) __U,
457*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
458*67e74705SXin Li }
459*67e74705SXin Li
460*67e74705SXin Li #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \
461*67e74705SXin Li (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
462*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
463*67e74705SXin Li (__mmask8)-1, (int)(R)); })
464*67e74705SXin Li
465*67e74705SXin Li #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
466*67e74705SXin Li (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
467*67e74705SXin Li (__v8sf)(__m256)(W), (__mmask8)(U), \
468*67e74705SXin Li (int)(R)); })
469*67e74705SXin Li
470*67e74705SXin Li #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
471*67e74705SXin Li (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
472*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
473*67e74705SXin Li (__mmask8)(U), (int)(R)); })
474*67e74705SXin Li
475*67e74705SXin Li
476*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epi64(__m512d __A)477*67e74705SXin Li _mm512_cvttpd_epi64 (__m512d __A) {
478*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
479*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
480*67e74705SXin Li (__mmask8) -1,
481*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
482*67e74705SXin Li }
483*67e74705SXin Li
484*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)485*67e74705SXin Li _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
486*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
487*67e74705SXin Li (__v8di) __W,
488*67e74705SXin Li (__mmask8) __U,
489*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
490*67e74705SXin Li }
491*67e74705SXin Li
492*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epi64(__mmask8 __U,__m512d __A)493*67e74705SXin Li _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
494*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
495*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
496*67e74705SXin Li (__mmask8) __U,
497*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
498*67e74705SXin Li }
499*67e74705SXin Li
500*67e74705SXin Li #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \
501*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
502*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
503*67e74705SXin Li (__mmask8)-1, (int)(R)); })
504*67e74705SXin Li
505*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
506*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
507*67e74705SXin Li (__v8di)(__m512i)(W), \
508*67e74705SXin Li (__mmask8)(U), (int)(R)); })
509*67e74705SXin Li
510*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
511*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
512*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
513*67e74705SXin Li (__mmask8)(U), (int)(R)); })
514*67e74705SXin Li
515*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epu64(__m512d __A)516*67e74705SXin Li _mm512_cvttpd_epu64 (__m512d __A) {
517*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
518*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
519*67e74705SXin Li (__mmask8) -1,
520*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
521*67e74705SXin Li }
522*67e74705SXin Li
523*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)524*67e74705SXin Li _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
525*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
526*67e74705SXin Li (__v8di) __W,
527*67e74705SXin Li (__mmask8) __U,
528*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
529*67e74705SXin Li }
530*67e74705SXin Li
531*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epu64(__mmask8 __U,__m512d __A)532*67e74705SXin Li _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
533*67e74705SXin Li return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
534*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
535*67e74705SXin Li (__mmask8) __U,
536*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
537*67e74705SXin Li }
538*67e74705SXin Li
539*67e74705SXin Li #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \
540*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
541*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
542*67e74705SXin Li (__mmask8)-1, (int)(R)); })
543*67e74705SXin Li
544*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
545*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
546*67e74705SXin Li (__v8di)(__m512i)(W), \
547*67e74705SXin Li (__mmask8)(U), (int)(R)); })
548*67e74705SXin Li
549*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \
550*67e74705SXin Li (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
551*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
552*67e74705SXin Li (__mmask8)(U), (int)(R)); })
553*67e74705SXin Li
554*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epi64(__m256 __A)555*67e74705SXin Li _mm512_cvttps_epi64 (__m256 __A) {
556*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
557*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
558*67e74705SXin Li (__mmask8) -1,
559*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
560*67e74705SXin Li }
561*67e74705SXin Li
562*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epi64(__m512i __W,__mmask8 __U,__m256 __A)563*67e74705SXin Li _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
564*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
565*67e74705SXin Li (__v8di) __W,
566*67e74705SXin Li (__mmask8) __U,
567*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
568*67e74705SXin Li }
569*67e74705SXin Li
570*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epi64(__mmask8 __U,__m256 __A)571*67e74705SXin Li _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
572*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
573*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
574*67e74705SXin Li (__mmask8) __U,
575*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
576*67e74705SXin Li }
577*67e74705SXin Li
578*67e74705SXin Li #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \
579*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
580*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
581*67e74705SXin Li (__mmask8)-1, (int)(R)); })
582*67e74705SXin Li
583*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
584*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
585*67e74705SXin Li (__v8di)(__m512i)(W), \
586*67e74705SXin Li (__mmask8)(U), (int)(R)); })
587*67e74705SXin Li
588*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \
589*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
590*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
591*67e74705SXin Li (__mmask8)(U), (int)(R)); })
592*67e74705SXin Li
593*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epu64(__m256 __A)594*67e74705SXin Li _mm512_cvttps_epu64 (__m256 __A) {
595*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
596*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
597*67e74705SXin Li (__mmask8) -1,
598*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
599*67e74705SXin Li }
600*67e74705SXin Li
601*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epu64(__m512i __W,__mmask8 __U,__m256 __A)602*67e74705SXin Li _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
603*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
604*67e74705SXin Li (__v8di) __W,
605*67e74705SXin Li (__mmask8) __U,
606*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
607*67e74705SXin Li }
608*67e74705SXin Li
609*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epu64(__mmask8 __U,__m256 __A)610*67e74705SXin Li _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
611*67e74705SXin Li return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
612*67e74705SXin Li (__v8di) _mm512_setzero_si512(),
613*67e74705SXin Li (__mmask8) __U,
614*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
615*67e74705SXin Li }
616*67e74705SXin Li
617*67e74705SXin Li #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \
618*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
619*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
620*67e74705SXin Li (__mmask8)-1, (int)(R)); })
621*67e74705SXin Li
622*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
623*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
624*67e74705SXin Li (__v8di)(__m512i)(W), \
625*67e74705SXin Li (__mmask8)(U), (int)(R)); })
626*67e74705SXin Li
627*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \
628*67e74705SXin Li (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
629*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
630*67e74705SXin Li (__mmask8)(U), (int)(R)); })
631*67e74705SXin Li
632*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepu64_pd(__m512i __A)633*67e74705SXin Li _mm512_cvtepu64_pd (__m512i __A) {
634*67e74705SXin Li return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
635*67e74705SXin Li (__v8df) _mm512_setzero_pd(),
636*67e74705SXin Li (__mmask8) -1,
637*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
638*67e74705SXin Li }
639*67e74705SXin Li
640*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_pd(__m512d __W,__mmask8 __U,__m512i __A)641*67e74705SXin Li _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
642*67e74705SXin Li return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
643*67e74705SXin Li (__v8df) __W,
644*67e74705SXin Li (__mmask8) __U,
645*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
646*67e74705SXin Li }
647*67e74705SXin Li
648*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_pd(__mmask8 __U,__m512i __A)649*67e74705SXin Li _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
650*67e74705SXin Li return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
651*67e74705SXin Li (__v8df) _mm512_setzero_pd(),
652*67e74705SXin Li (__mmask8) __U,
653*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
654*67e74705SXin Li }
655*67e74705SXin Li
656*67e74705SXin Li #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \
657*67e74705SXin Li (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
658*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
659*67e74705SXin Li (__mmask8)-1, (int)(R)); })
660*67e74705SXin Li
661*67e74705SXin Li #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
662*67e74705SXin Li (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
663*67e74705SXin Li (__v8df)(__m512d)(W), \
664*67e74705SXin Li (__mmask8)(U), (int)(R)); })
665*67e74705SXin Li
666*67e74705SXin Li
667*67e74705SXin Li #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
668*67e74705SXin Li (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
669*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
670*67e74705SXin Li (__mmask8)(U), (int)(R)); })
671*67e74705SXin Li
672*67e74705SXin Li
673*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepu64_ps(__m512i __A)674*67e74705SXin Li _mm512_cvtepu64_ps (__m512i __A) {
675*67e74705SXin Li return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
676*67e74705SXin Li (__v8sf) _mm256_setzero_ps(),
677*67e74705SXin Li (__mmask8) -1,
678*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
679*67e74705SXin Li }
680*67e74705SXin Li
681*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_ps(__m256 __W,__mmask8 __U,__m512i __A)682*67e74705SXin Li _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
683*67e74705SXin Li return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
684*67e74705SXin Li (__v8sf) __W,
685*67e74705SXin Li (__mmask8) __U,
686*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
687*67e74705SXin Li }
688*67e74705SXin Li
689*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_ps(__mmask8 __U,__m512i __A)690*67e74705SXin Li _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
691*67e74705SXin Li return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
692*67e74705SXin Li (__v8sf) _mm256_setzero_ps(),
693*67e74705SXin Li (__mmask8) __U,
694*67e74705SXin Li _MM_FROUND_CUR_DIRECTION);
695*67e74705SXin Li }
696*67e74705SXin Li
697*67e74705SXin Li #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \
698*67e74705SXin Li (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
699*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
700*67e74705SXin Li (__mmask8)-1, (int)(R)); })
701*67e74705SXin Li
702*67e74705SXin Li #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
703*67e74705SXin Li (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
704*67e74705SXin Li (__v8sf)(__m256)(W), (__mmask8)(U), \
705*67e74705SXin Li (int)(R)); })
706*67e74705SXin Li
707*67e74705SXin Li #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
708*67e74705SXin Li (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
709*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
710*67e74705SXin Li (__mmask8)(U), (int)(R)); })
711*67e74705SXin Li
712*67e74705SXin Li #define _mm512_range_pd(A, B, C) __extension__ ({ \
713*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
714*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
715*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
716*67e74705SXin Li (__mmask8)-1, \
717*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
718*67e74705SXin Li
719*67e74705SXin Li #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \
720*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
721*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
722*67e74705SXin Li (__v8df)(__m512d)(W), (__mmask8)(U), \
723*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
724*67e74705SXin Li
725*67e74705SXin Li #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \
726*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
727*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
728*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
729*67e74705SXin Li (__mmask8)(U), \
730*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
731*67e74705SXin Li
732*67e74705SXin Li #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \
733*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
734*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
735*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
736*67e74705SXin Li (__mmask8)-1, (int)(R)); })
737*67e74705SXin Li
738*67e74705SXin Li #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
739*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
740*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
741*67e74705SXin Li (__v8df)(__m512d)(W), (__mmask8)(U), \
742*67e74705SXin Li (int)(R)); })
743*67e74705SXin Li
744*67e74705SXin Li #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
745*67e74705SXin Li (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
746*67e74705SXin Li (__v8df)(__m512d)(B), (int)(C), \
747*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
748*67e74705SXin Li (__mmask8)(U), (int)(R)); })
749*67e74705SXin Li
750*67e74705SXin Li #define _mm512_range_ps(A, B, C) __extension__ ({ \
751*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
752*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
753*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
754*67e74705SXin Li (__mmask16)-1, \
755*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
756*67e74705SXin Li
757*67e74705SXin Li #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \
758*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
759*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
760*67e74705SXin Li (__v16sf)(__m512)(W), (__mmask16)(U), \
761*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
762*67e74705SXin Li
763*67e74705SXin Li #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \
764*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
765*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
766*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
767*67e74705SXin Li (__mmask16)(U), \
768*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
769*67e74705SXin Li
770*67e74705SXin Li #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \
771*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
772*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
773*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
774*67e74705SXin Li (__mmask16)-1, (int)(R)); })
775*67e74705SXin Li
776*67e74705SXin Li #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
777*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
778*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
779*67e74705SXin Li (__v16sf)(__m512)(W), (__mmask16)(U), \
780*67e74705SXin Li (int)(R)); })
781*67e74705SXin Li
782*67e74705SXin Li #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
783*67e74705SXin Li (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
784*67e74705SXin Li (__v16sf)(__m512)(B), (int)(C), \
785*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
786*67e74705SXin Li (__mmask16)(U), (int)(R)); })
787*67e74705SXin Li
788*67e74705SXin Li #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \
789*67e74705SXin Li (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
790*67e74705SXin Li (__v4sf)(__m128)(B), \
791*67e74705SXin Li (__v4sf)_mm_setzero_ps(), \
792*67e74705SXin Li (__mmask8) -1, (int)(C),\
793*67e74705SXin Li (int)(R)); })
794*67e74705SXin Li
795*67e74705SXin Li #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
796*67e74705SXin Li
797*67e74705SXin Li #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
798*67e74705SXin Li (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
799*67e74705SXin Li (__v4sf)(__m128)(B), \
800*67e74705SXin Li (__v4sf)(__m128)(W),\
801*67e74705SXin Li (__mmask8)(U), (int)(C),\
802*67e74705SXin Li (int)(R)); })
803*67e74705SXin Li
804*67e74705SXin Li #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
805*67e74705SXin Li
806*67e74705SXin Li #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
807*67e74705SXin Li (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
808*67e74705SXin Li (__v4sf)(__m128)(B), \
809*67e74705SXin Li (__v4sf)_mm_setzero_ps(), \
810*67e74705SXin Li (__mmask8)(U), (int)(C),\
811*67e74705SXin Li (int)(R)); })
812*67e74705SXin Li
813*67e74705SXin Li #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
814*67e74705SXin Li
815*67e74705SXin Li #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \
816*67e74705SXin Li (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
817*67e74705SXin Li (__v2df)(__m128d)(B), \
818*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
819*67e74705SXin Li (__mmask8) -1, (int)(C),\
820*67e74705SXin Li (int)(R)); })
821*67e74705SXin Li
822*67e74705SXin Li #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
823*67e74705SXin Li
824*67e74705SXin Li #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
825*67e74705SXin Li (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
826*67e74705SXin Li (__v2df)(__m128d)(B), \
827*67e74705SXin Li (__v2df)(__m128d)(W),\
828*67e74705SXin Li (__mmask8)(U), (int)(C),\
829*67e74705SXin Li (int)(R)); })
830*67e74705SXin Li
831*67e74705SXin Li #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
832*67e74705SXin Li
833*67e74705SXin Li #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
834*67e74705SXin Li (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
835*67e74705SXin Li (__v2df)(__m128d)(B), \
836*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
837*67e74705SXin Li (__mmask8)(U), (int)(C),\
838*67e74705SXin Li (int)(R)); })
839*67e74705SXin Li
840*67e74705SXin Li #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
841*67e74705SXin Li
842*67e74705SXin Li #define _mm512_reduce_pd(A, B) __extension__ ({ \
843*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
844*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
845*67e74705SXin Li (__mmask8)-1, \
846*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
847*67e74705SXin Li
848*67e74705SXin Li #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
849*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850*67e74705SXin Li (__v8df)(__m512d)(W), \
851*67e74705SXin Li (__mmask8)(U), \
852*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
853*67e74705SXin Li
854*67e74705SXin Li #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \
855*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
856*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
857*67e74705SXin Li (__mmask8)(U), \
858*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
859*67e74705SXin Li
860*67e74705SXin Li #define _mm512_reduce_ps(A, B) __extension__ ({ \
861*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
862*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
863*67e74705SXin Li (__mmask16)-1, \
864*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
865*67e74705SXin Li
866*67e74705SXin Li #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \
867*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
868*67e74705SXin Li (__v16sf)(__m512)(W), \
869*67e74705SXin Li (__mmask16)(U), \
870*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
871*67e74705SXin Li
872*67e74705SXin Li #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \
873*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
874*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
875*67e74705SXin Li (__mmask16)(U), \
876*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
877*67e74705SXin Li
878*67e74705SXin Li #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
879*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
880*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
881*67e74705SXin Li (__mmask8)-1, (int)(R)); })
882*67e74705SXin Li
883*67e74705SXin Li #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
884*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
885*67e74705SXin Li (__v8df)(__m512d)(W), \
886*67e74705SXin Li (__mmask8)(U), (int)(R)); })
887*67e74705SXin Li
888*67e74705SXin Li #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
889*67e74705SXin Li (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
890*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
891*67e74705SXin Li (__mmask8)(U), (int)(R)); })
892*67e74705SXin Li
893*67e74705SXin Li #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
894*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
895*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
896*67e74705SXin Li (__mmask16)-1, (int)(R)); })
897*67e74705SXin Li
898*67e74705SXin Li #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
899*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
900*67e74705SXin Li (__v16sf)(__m512)(W), \
901*67e74705SXin Li (__mmask16)(U), (int)(R)); })
902*67e74705SXin Li
903*67e74705SXin Li #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
904*67e74705SXin Li (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
905*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
906*67e74705SXin Li (__mmask16)(U), (int)(R)); })
907*67e74705SXin Li
908*67e74705SXin Li #define _mm_reduce_ss(A, B, C) __extension__ ({ \
909*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
910*67e74705SXin Li (__v4sf)(__m128)(B), \
911*67e74705SXin Li (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
912*67e74705SXin Li (int)(C), _MM_FROUND_CUR_DIRECTION); })
913*67e74705SXin Li
914*67e74705SXin Li #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
915*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
916*67e74705SXin Li (__v4sf)(__m128)(B), \
917*67e74705SXin Li (__v4sf)(__m128)(W), (__mmask8)(U), \
918*67e74705SXin Li (int)(C), _MM_FROUND_CUR_DIRECTION); })
919*67e74705SXin Li
920*67e74705SXin Li #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
921*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
922*67e74705SXin Li (__v4sf)(__m128)(B), \
923*67e74705SXin Li (__v4sf)_mm_setzero_ps(), \
924*67e74705SXin Li (__mmask8)(U), (int)(C), \
925*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
926*67e74705SXin Li
927*67e74705SXin Li #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
928*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
929*67e74705SXin Li (__v4sf)(__m128)(B), \
930*67e74705SXin Li (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
931*67e74705SXin Li (int)(C), (int)(R)); })
932*67e74705SXin Li
933*67e74705SXin Li #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
934*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
935*67e74705SXin Li (__v4sf)(__m128)(B), \
936*67e74705SXin Li (__v4sf)(__m128)(W), (__mmask8)(U), \
937*67e74705SXin Li (int)(C), (int)(R)); })
938*67e74705SXin Li
939*67e74705SXin Li #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
940*67e74705SXin Li (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
941*67e74705SXin Li (__v4sf)(__m128)(B), \
942*67e74705SXin Li (__v4sf)_mm_setzero_ps(), \
943*67e74705SXin Li (__mmask8)(U), (int)(C), (int)(R)); })
944*67e74705SXin Li
945*67e74705SXin Li #define _mm_reduce_sd(A, B, C) __extension__ ({ \
946*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
947*67e74705SXin Li (__v2df)(__m128d)(B), \
948*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
949*67e74705SXin Li (__mmask8)-1, (int)(C), \
950*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
951*67e74705SXin Li
952*67e74705SXin Li #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
953*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
954*67e74705SXin Li (__v2df)(__m128d)(B), \
955*67e74705SXin Li (__v2df)(__m128d)(W), (__mmask8)(U), \
956*67e74705SXin Li (int)(C), _MM_FROUND_CUR_DIRECTION); })
957*67e74705SXin Li
958*67e74705SXin Li #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
959*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
960*67e74705SXin Li (__v2df)(__m128d)(B), \
961*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
962*67e74705SXin Li (__mmask8)(U), (int)(C), \
963*67e74705SXin Li _MM_FROUND_CUR_DIRECTION); })
964*67e74705SXin Li
965*67e74705SXin Li #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
966*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
967*67e74705SXin Li (__v2df)(__m128d)(B), \
968*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
969*67e74705SXin Li (__mmask8)-1, (int)(C), (int)(R)); })
970*67e74705SXin Li
971*67e74705SXin Li #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
972*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
973*67e74705SXin Li (__v2df)(__m128d)(B), \
974*67e74705SXin Li (__v2df)(__m128d)(W), (__mmask8)(U), \
975*67e74705SXin Li (int)(C), (int)(R)); })
976*67e74705SXin Li
977*67e74705SXin Li #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
978*67e74705SXin Li (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
979*67e74705SXin Li (__v2df)(__m128d)(B), \
980*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
981*67e74705SXin Li (__mmask8)(U), (int)(C), (int)(R)); })
982*67e74705SXin Li
983*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_movepi32_mask(__m512i __A)984*67e74705SXin Li _mm512_movepi32_mask (__m512i __A)
985*67e74705SXin Li {
986*67e74705SXin Li return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
987*67e74705SXin Li }
988*67e74705SXin Li
989*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi32(__mmask16 __A)990*67e74705SXin Li _mm512_movm_epi32 (__mmask16 __A)
991*67e74705SXin Li {
992*67e74705SXin Li return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
993*67e74705SXin Li }
994*67e74705SXin Li
995*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi64(__mmask8 __A)996*67e74705SXin Li _mm512_movm_epi64 (__mmask8 __A)
997*67e74705SXin Li {
998*67e74705SXin Li return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
999*67e74705SXin Li }
1000*67e74705SXin Li
1001*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_movepi64_mask(__m512i __A)1002*67e74705SXin Li _mm512_movepi64_mask (__m512i __A)
1003*67e74705SXin Li {
1004*67e74705SXin Li return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1005*67e74705SXin Li }
1006*67e74705SXin Li
1007*67e74705SXin Li
1008*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2(__m128 __A)1009*67e74705SXin Li _mm512_broadcast_f32x2 (__m128 __A)
1010*67e74705SXin Li {
1011*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1012*67e74705SXin Li (__v16sf)_mm512_undefined_ps(),
1013*67e74705SXin Li (__mmask16) -1);
1014*67e74705SXin Li }
1015*67e74705SXin Li
1016*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2(__m512 __O,__mmask16 __M,__m128 __A)1017*67e74705SXin Li _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1018*67e74705SXin Li {
1019*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1020*67e74705SXin Li (__v16sf)
1021*67e74705SXin Li __O, __M);
1022*67e74705SXin Li }
1023*67e74705SXin Li
1024*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2(__mmask16 __M,__m128 __A)1025*67e74705SXin Li _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1026*67e74705SXin Li {
1027*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1028*67e74705SXin Li (__v16sf)_mm512_setzero_ps (),
1029*67e74705SXin Li __M);
1030*67e74705SXin Li }
1031*67e74705SXin Li
1032*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x8(__m256 __A)1033*67e74705SXin Li _mm512_broadcast_f32x8 (__m256 __A)
1034*67e74705SXin Li {
1035*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1036*67e74705SXin Li _mm512_undefined_ps(),
1037*67e74705SXin Li (__mmask16) -1);
1038*67e74705SXin Li }
1039*67e74705SXin Li
1040*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x8(__m512 __O,__mmask16 __M,__m256 __A)1041*67e74705SXin Li _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
1042*67e74705SXin Li {
1043*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1044*67e74705SXin Li (__v16sf)__O,
1045*67e74705SXin Li __M);
1046*67e74705SXin Li }
1047*67e74705SXin Li
1048*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x8(__mmask16 __M,__m256 __A)1049*67e74705SXin Li _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
1050*67e74705SXin Li {
1051*67e74705SXin Li return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1052*67e74705SXin Li (__v16sf)_mm512_setzero_ps (),
1053*67e74705SXin Li __M);
1054*67e74705SXin Li }
1055*67e74705SXin Li
1056*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_broadcast_f64x2(__m128d __A)1057*67e74705SXin Li _mm512_broadcast_f64x2 (__m128d __A)
1058*67e74705SXin Li {
1059*67e74705SXin Li return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1060*67e74705SXin Li (__v8df)_mm512_undefined_pd(),
1061*67e74705SXin Li (__mmask8) -1);
1062*67e74705SXin Li }
1063*67e74705SXin Li
1064*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f64x2(__m512d __O,__mmask8 __M,__m128d __A)1065*67e74705SXin Li _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
1066*67e74705SXin Li {
1067*67e74705SXin Li return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1068*67e74705SXin Li (__v8df)
1069*67e74705SXin Li __O, __M);
1070*67e74705SXin Li }
1071*67e74705SXin Li
1072*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f64x2(__mmask8 __M,__m128d __A)1073*67e74705SXin Li _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1074*67e74705SXin Li {
1075*67e74705SXin Li return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1076*67e74705SXin Li (__v8df)_mm512_setzero_ps (),
1077*67e74705SXin Li __M);
1078*67e74705SXin Li }
1079*67e74705SXin Li
1080*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2(__m128i __A)1081*67e74705SXin Li _mm512_broadcast_i32x2 (__m128i __A)
1082*67e74705SXin Li {
1083*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1084*67e74705SXin Li (__v16si)_mm512_setzero_si512(),
1085*67e74705SXin Li (__mmask16) -1);
1086*67e74705SXin Li }
1087*67e74705SXin Li
1088*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2(__m512i __O,__mmask16 __M,__m128i __A)1089*67e74705SXin Li _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1090*67e74705SXin Li {
1091*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1092*67e74705SXin Li (__v16si)
1093*67e74705SXin Li __O, __M);
1094*67e74705SXin Li }
1095*67e74705SXin Li
1096*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2(__mmask16 __M,__m128i __A)1097*67e74705SXin Li _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1098*67e74705SXin Li {
1099*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1100*67e74705SXin Li (__v16si)_mm512_setzero_si512 (),
1101*67e74705SXin Li __M);
1102*67e74705SXin Li }
1103*67e74705SXin Li
1104*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x8(__m256i __A)1105*67e74705SXin Li _mm512_broadcast_i32x8 (__m256i __A)
1106*67e74705SXin Li {
1107*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1108*67e74705SXin Li (__v16si)_mm512_setzero_si512(),
1109*67e74705SXin Li (__mmask16) -1);
1110*67e74705SXin Li }
1111*67e74705SXin Li
1112*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x8(__m512i __O,__mmask16 __M,__m256i __A)1113*67e74705SXin Li _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
1114*67e74705SXin Li {
1115*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1116*67e74705SXin Li (__v16si)__O,
1117*67e74705SXin Li __M);
1118*67e74705SXin Li }
1119*67e74705SXin Li
1120*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x8(__mmask16 __M,__m256i __A)1121*67e74705SXin Li _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
1122*67e74705SXin Li {
1123*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1124*67e74705SXin Li (__v16si)
1125*67e74705SXin Li _mm512_setzero_si512 (),
1126*67e74705SXin Li __M);
1127*67e74705SXin Li }
1128*67e74705SXin Li
1129*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i64x2(__m128i __A)1130*67e74705SXin Li _mm512_broadcast_i64x2 (__m128i __A)
1131*67e74705SXin Li {
1132*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1133*67e74705SXin Li (__v8di)_mm512_setzero_si512(),
1134*67e74705SXin Li (__mmask8) -1);
1135*67e74705SXin Li }
1136*67e74705SXin Li
1137*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i64x2(__m512i __O,__mmask8 __M,__m128i __A)1138*67e74705SXin Li _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
1139*67e74705SXin Li {
1140*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1141*67e74705SXin Li (__v8di)
1142*67e74705SXin Li __O, __M);
1143*67e74705SXin Li }
1144*67e74705SXin Li
1145*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i64x2(__mmask8 __M,__m128i __A)1146*67e74705SXin Li _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147*67e74705SXin Li {
1148*67e74705SXin Li return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1149*67e74705SXin Li (__v8di)_mm512_setzero_si512 (),
1150*67e74705SXin Li __M);
1151*67e74705SXin Li }
1152*67e74705SXin Li
1153*67e74705SXin Li #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1154*67e74705SXin Li (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1155*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
1156*67e74705SXin Li (__mmask8)-1); })
1157*67e74705SXin Li
1158*67e74705SXin Li #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1159*67e74705SXin Li (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1160*67e74705SXin Li (__v8sf)(__m256)(W), \
1161*67e74705SXin Li (__mmask8)(U)); })
1162*67e74705SXin Li
1163*67e74705SXin Li #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1164*67e74705SXin Li (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1165*67e74705SXin Li (__v8sf)_mm256_setzero_ps(), \
1166*67e74705SXin Li (__mmask8)(U)); })
1167*67e74705SXin Li
1168*67e74705SXin Li #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1169*67e74705SXin Li (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1170*67e74705SXin Li (int)(imm), \
1171*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
1172*67e74705SXin Li (__mmask8)-1); })
1173*67e74705SXin Li
1174*67e74705SXin Li #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1175*67e74705SXin Li (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1176*67e74705SXin Li (int)(imm), \
1177*67e74705SXin Li (__v2df)(__m128d)(W), \
1178*67e74705SXin Li (__mmask8)(U)); })
1179*67e74705SXin Li
1180*67e74705SXin Li #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1181*67e74705SXin Li (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1182*67e74705SXin Li (int)(imm), \
1183*67e74705SXin Li (__v2df)_mm_setzero_pd(), \
1184*67e74705SXin Li (__mmask8)(U)); })
1185*67e74705SXin Li
1186*67e74705SXin Li #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1187*67e74705SXin Li (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1188*67e74705SXin Li (__v8si)_mm256_setzero_si256(), \
1189*67e74705SXin Li (__mmask8)-1); })
1190*67e74705SXin Li
1191*67e74705SXin Li #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1192*67e74705SXin Li (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1193*67e74705SXin Li (__v8si)(__m256i)(W), \
1194*67e74705SXin Li (__mmask8)(U)); })
1195*67e74705SXin Li
1196*67e74705SXin Li #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1197*67e74705SXin Li (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1198*67e74705SXin Li (__v8si)_mm256_setzero_si256(), \
1199*67e74705SXin Li (__mmask8)(U)); })
1200*67e74705SXin Li
1201*67e74705SXin Li #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1202*67e74705SXin Li (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1203*67e74705SXin Li (int)(imm), \
1204*67e74705SXin Li (__v2di)_mm_setzero_di(), \
1205*67e74705SXin Li (__mmask8)-1); })
1206*67e74705SXin Li
1207*67e74705SXin Li #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1208*67e74705SXin Li (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1209*67e74705SXin Li (int)(imm), \
1210*67e74705SXin Li (__v2di)(__m128i)(W), \
1211*67e74705SXin Li (__mmask8)(U)); })
1212*67e74705SXin Li
1213*67e74705SXin Li #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1214*67e74705SXin Li (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1215*67e74705SXin Li (int)(imm), \
1216*67e74705SXin Li (__v2di)_mm_setzero_di(), \
1217*67e74705SXin Li (__mmask8)(U)); })
1218*67e74705SXin Li
1219*67e74705SXin Li #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1220*67e74705SXin Li (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1221*67e74705SXin Li (__v8sf)(__m256)(B), (int)(imm), \
1222*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
1223*67e74705SXin Li (__mmask16)-1); })
1224*67e74705SXin Li
1225*67e74705SXin Li #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1226*67e74705SXin Li (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1227*67e74705SXin Li (__v8sf)(__m256)(B), (int)(imm), \
1228*67e74705SXin Li (__v16sf)(__m512)(W), \
1229*67e74705SXin Li (__mmask16)(U)); })
1230*67e74705SXin Li
1231*67e74705SXin Li #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1232*67e74705SXin Li (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1233*67e74705SXin Li (__v8sf)(__m256)(B), (int)(imm), \
1234*67e74705SXin Li (__v16sf)_mm512_setzero_ps(), \
1235*67e74705SXin Li (__mmask16)(U)); })
1236*67e74705SXin Li
1237*67e74705SXin Li #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1238*67e74705SXin Li (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1239*67e74705SXin Li (__v2df)(__m128d)(B), \
1240*67e74705SXin Li (int)(imm), \
1241*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
1242*67e74705SXin Li (__mmask8)-1); })
1243*67e74705SXin Li
1244*67e74705SXin Li #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1245*67e74705SXin Li (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1246*67e74705SXin Li (__v2df)(__m128d)(B), \
1247*67e74705SXin Li (int)(imm), \
1248*67e74705SXin Li (__v8df)(__m512d)(W), \
1249*67e74705SXin Li (__mmask8)(U)); })
1250*67e74705SXin Li
1251*67e74705SXin Li #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1252*67e74705SXin Li (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1253*67e74705SXin Li (__v2df)(__m128d)(B), \
1254*67e74705SXin Li (int)(imm), \
1255*67e74705SXin Li (__v8df)_mm512_setzero_pd(), \
1256*67e74705SXin Li (__mmask8)(U)); })
1257*67e74705SXin Li
1258*67e74705SXin Li #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1259*67e74705SXin Li (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1260*67e74705SXin Li (__v8si)(__m256i)(B), (int)(imm), \
1261*67e74705SXin Li (__v16si)_mm512_setzero_si512(), \
1262*67e74705SXin Li (__mmask16)-1); })
1263*67e74705SXin Li
1264*67e74705SXin Li #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1265*67e74705SXin Li (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1266*67e74705SXin Li (__v8si)(__m256i)(B), (int)(imm), \
1267*67e74705SXin Li (__v16si)(__m512i)(W), \
1268*67e74705SXin Li (__mmask16)(U)); })
1269*67e74705SXin Li
1270*67e74705SXin Li #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1271*67e74705SXin Li (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1272*67e74705SXin Li (__v8si)(__m256i)(B), (int)(imm), \
1273*67e74705SXin Li (__v16si)_mm512_setzero_si512(), \
1274*67e74705SXin Li (__mmask16)(U)); })
1275*67e74705SXin Li
1276*67e74705SXin Li #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1277*67e74705SXin Li (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1278*67e74705SXin Li (__v2di)(__m128i)(B), \
1279*67e74705SXin Li (int)(imm), \
1280*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
1281*67e74705SXin Li (__mmask8)-1); })
1282*67e74705SXin Li
1283*67e74705SXin Li #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1284*67e74705SXin Li (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1285*67e74705SXin Li (__v2di)(__m128i)(B), \
1286*67e74705SXin Li (int)(imm), \
1287*67e74705SXin Li (__v8di)(__m512i)(W), \
1288*67e74705SXin Li (__mmask8)(U)); })
1289*67e74705SXin Li
1290*67e74705SXin Li #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1291*67e74705SXin Li (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1292*67e74705SXin Li (__v2di)(__m128i)(B), \
1293*67e74705SXin Li (int)(imm), \
1294*67e74705SXin Li (__v8di)_mm512_setzero_si512(), \
1295*67e74705SXin Li (__mmask8)(U)); })
1296*67e74705SXin Li
1297*67e74705SXin Li #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1298*67e74705SXin Li (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1299*67e74705SXin Li (int)(imm), (__mmask16)(U)); })
1300*67e74705SXin Li
1301*67e74705SXin Li #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1302*67e74705SXin Li (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1303*67e74705SXin Li (int)(imm), (__mmask16)-1); })
1304*67e74705SXin Li
1305*67e74705SXin Li #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1306*67e74705SXin Li (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1307*67e74705SXin Li (__mmask8)(U)); })
1308*67e74705SXin Li
1309*67e74705SXin Li #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1310*67e74705SXin Li (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1311*67e74705SXin Li (__mmask8)-1); })
1312*67e74705SXin Li
1313*67e74705SXin Li #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1314*67e74705SXin Li (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1315*67e74705SXin Li (__mmask8)-1); })
1316*67e74705SXin Li
1317*67e74705SXin Li #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1318*67e74705SXin Li (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1319*67e74705SXin Li (__mmask8)(U)); })
1320*67e74705SXin Li
1321*67e74705SXin Li #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1322*67e74705SXin Li (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1323*67e74705SXin Li (__mmask8)-1); })
1324*67e74705SXin Li
1325*67e74705SXin Li #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1326*67e74705SXin Li (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1327*67e74705SXin Li (__mmask8)(U)); })
1328*67e74705SXin Li
1329*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
1330*67e74705SXin Li
1331*67e74705SXin Li #endif
1332