xref: /aosp_15_r20/external/clang/lib/Headers/avx512fintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li #ifndef __IMMINTRIN_H
24*67e74705SXin Li #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25*67e74705SXin Li #endif
26*67e74705SXin Li 
27*67e74705SXin Li #ifndef __AVX512FINTRIN_H
28*67e74705SXin Li #define __AVX512FINTRIN_H
29*67e74705SXin Li 
30*67e74705SXin Li typedef char __v64qi __attribute__((__vector_size__(64)));
31*67e74705SXin Li typedef short __v32hi __attribute__((__vector_size__(64)));
32*67e74705SXin Li typedef double __v8df __attribute__((__vector_size__(64)));
33*67e74705SXin Li typedef float __v16sf __attribute__((__vector_size__(64)));
34*67e74705SXin Li typedef long long __v8di __attribute__((__vector_size__(64)));
35*67e74705SXin Li typedef int __v16si __attribute__((__vector_size__(64)));
36*67e74705SXin Li 
37*67e74705SXin Li /* Unsigned types */
38*67e74705SXin Li typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39*67e74705SXin Li typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40*67e74705SXin Li typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41*67e74705SXin Li typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42*67e74705SXin Li 
43*67e74705SXin Li typedef float __m512 __attribute__((__vector_size__(64)));
44*67e74705SXin Li typedef double __m512d __attribute__((__vector_size__(64)));
45*67e74705SXin Li typedef long long __m512i __attribute__((__vector_size__(64)));
46*67e74705SXin Li 
47*67e74705SXin Li typedef unsigned char __mmask8;
48*67e74705SXin Li typedef unsigned short __mmask16;
49*67e74705SXin Li 
50*67e74705SXin Li /* Rounding mode macros.  */
51*67e74705SXin Li #define _MM_FROUND_TO_NEAREST_INT   0x00
52*67e74705SXin Li #define _MM_FROUND_TO_NEG_INF       0x01
53*67e74705SXin Li #define _MM_FROUND_TO_POS_INF       0x02
54*67e74705SXin Li #define _MM_FROUND_TO_ZERO          0x03
55*67e74705SXin Li #define _MM_FROUND_CUR_DIRECTION    0x04
56*67e74705SXin Li 
57*67e74705SXin Li typedef enum
58*67e74705SXin Li {
59*67e74705SXin Li   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
60*67e74705SXin Li   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
61*67e74705SXin Li   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
62*67e74705SXin Li   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
63*67e74705SXin Li   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
64*67e74705SXin Li   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
65*67e74705SXin Li   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
66*67e74705SXin Li   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
67*67e74705SXin Li   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
68*67e74705SXin Li   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
69*67e74705SXin Li   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
70*67e74705SXin Li   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
71*67e74705SXin Li   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
72*67e74705SXin Li   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
73*67e74705SXin Li   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
74*67e74705SXin Li   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
75*67e74705SXin Li   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
76*67e74705SXin Li   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
77*67e74705SXin Li   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
78*67e74705SXin Li   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
79*67e74705SXin Li   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
80*67e74705SXin Li   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
81*67e74705SXin Li   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
82*67e74705SXin Li   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
83*67e74705SXin Li   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
84*67e74705SXin Li   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
85*67e74705SXin Li   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
86*67e74705SXin Li   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
87*67e74705SXin Li   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
88*67e74705SXin Li   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
89*67e74705SXin Li   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
90*67e74705SXin Li   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
91*67e74705SXin Li   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
92*67e74705SXin Li   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
93*67e74705SXin Li   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
94*67e74705SXin Li   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
95*67e74705SXin Li   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
96*67e74705SXin Li   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
97*67e74705SXin Li   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
98*67e74705SXin Li   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
99*67e74705SXin Li   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
100*67e74705SXin Li   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
101*67e74705SXin Li   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
102*67e74705SXin Li   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
103*67e74705SXin Li   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
104*67e74705SXin Li   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
105*67e74705SXin Li   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
106*67e74705SXin Li   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
107*67e74705SXin Li   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
108*67e74705SXin Li   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
109*67e74705SXin Li   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
110*67e74705SXin Li   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
111*67e74705SXin Li   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
112*67e74705SXin Li   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
113*67e74705SXin Li   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
114*67e74705SXin Li   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
115*67e74705SXin Li   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
116*67e74705SXin Li   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
117*67e74705SXin Li   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
118*67e74705SXin Li   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
119*67e74705SXin Li   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
120*67e74705SXin Li   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
121*67e74705SXin Li   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
122*67e74705SXin Li   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
123*67e74705SXin Li   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
124*67e74705SXin Li   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
125*67e74705SXin Li   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
126*67e74705SXin Li   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
127*67e74705SXin Li   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
128*67e74705SXin Li   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
129*67e74705SXin Li   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
130*67e74705SXin Li   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
131*67e74705SXin Li   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
132*67e74705SXin Li   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
133*67e74705SXin Li   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
134*67e74705SXin Li   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
135*67e74705SXin Li   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
136*67e74705SXin Li   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
137*67e74705SXin Li   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
138*67e74705SXin Li   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
139*67e74705SXin Li   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
140*67e74705SXin Li   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
141*67e74705SXin Li   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
142*67e74705SXin Li   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
143*67e74705SXin Li   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
144*67e74705SXin Li   _MM_PERM_DDDD = 0xFF
145*67e74705SXin Li } _MM_PERM_ENUM;
146*67e74705SXin Li 
147*67e74705SXin Li typedef enum
148*67e74705SXin Li {
149*67e74705SXin Li   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
150*67e74705SXin Li   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
151*67e74705SXin Li   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
152*67e74705SXin Li   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
153*67e74705SXin Li } _MM_MANTISSA_NORM_ENUM;
154*67e74705SXin Li 
155*67e74705SXin Li typedef enum
156*67e74705SXin Li {
157*67e74705SXin Li   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
158*67e74705SXin Li   _MM_MANT_SIGN_zero,   /* sign = 0             */
159*67e74705SXin Li   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
160*67e74705SXin Li } _MM_MANTISSA_SIGN_ENUM;
161*67e74705SXin Li 
162*67e74705SXin Li /* Define the default attributes for the functions in this file. */
163*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
164*67e74705SXin Li 
165*67e74705SXin Li /* Create vectors with repeated elements */
166*67e74705SXin Li 
167*67e74705SXin Li static  __inline __m512i __DEFAULT_FN_ATTRS
_mm512_setzero_si512(void)168*67e74705SXin Li _mm512_setzero_si512(void)
169*67e74705SXin Li {
170*67e74705SXin Li   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
171*67e74705SXin Li }
172*67e74705SXin Li 
173*67e74705SXin Li #define _mm512_setzero_epi32 _mm512_setzero_si512
174*67e74705SXin Li 
175*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_undefined_pd(void)176*67e74705SXin Li _mm512_undefined_pd(void)
177*67e74705SXin Li {
178*67e74705SXin Li   return (__m512d)__builtin_ia32_undef512();
179*67e74705SXin Li }
180*67e74705SXin Li 
181*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_undefined(void)182*67e74705SXin Li _mm512_undefined(void)
183*67e74705SXin Li {
184*67e74705SXin Li   return (__m512)__builtin_ia32_undef512();
185*67e74705SXin Li }
186*67e74705SXin Li 
187*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_undefined_ps(void)188*67e74705SXin Li _mm512_undefined_ps(void)
189*67e74705SXin Li {
190*67e74705SXin Li   return (__m512)__builtin_ia32_undef512();
191*67e74705SXin Li }
192*67e74705SXin Li 
193*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_undefined_epi32(void)194*67e74705SXin Li _mm512_undefined_epi32(void)
195*67e74705SXin Li {
196*67e74705SXin Li   return (__m512i)__builtin_ia32_undef512();
197*67e74705SXin Li }
198*67e74705SXin Li 
199*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastd_epi32(__m128i __A)200*67e74705SXin Li _mm512_broadcastd_epi32 (__m128i __A)
201*67e74705SXin Li {
202*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v4si) __A,
203*67e74705SXin Li                                           (__v4si)_mm_undefined_si128(),
204*67e74705SXin Li                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
205*67e74705SXin Li }
206*67e74705SXin Li 
207*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcastd_epi32(__m512i __O,__mmask16 __M,__m128i __A)208*67e74705SXin Li _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
209*67e74705SXin Li {
210*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512(__M,
211*67e74705SXin Li                                              (__v16si) _mm512_broadcastd_epi32(__A),
212*67e74705SXin Li                                              (__v16si) __O);
213*67e74705SXin Li }
214*67e74705SXin Li 
215*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcastd_epi32(__mmask16 __M,__m128i __A)216*67e74705SXin Li _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
217*67e74705SXin Li {
218*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512(__M,
219*67e74705SXin Li                                              (__v16si) _mm512_broadcastd_epi32(__A),
220*67e74705SXin Li                                              (__v16si) _mm512_setzero_si512());
221*67e74705SXin Li }
222*67e74705SXin Li 
223*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastq_epi64(__m128i __A)224*67e74705SXin Li _mm512_broadcastq_epi64 (__m128i __A)
225*67e74705SXin Li {
226*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v2di) __A,
227*67e74705SXin Li                                           (__v2di) _mm_undefined_si128(),
228*67e74705SXin Li                                           0, 0, 0, 0, 0, 0, 0, 0);
229*67e74705SXin Li }
230*67e74705SXin Li 
231*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcastq_epi64(__m512i __O,__mmask8 __M,__m128i __A)232*67e74705SXin Li _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
233*67e74705SXin Li {
234*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512(__M,
235*67e74705SXin Li                                              (__v8di) _mm512_broadcastq_epi64(__A),
236*67e74705SXin Li                                              (__v8di) __O);
237*67e74705SXin Li 
238*67e74705SXin Li }
239*67e74705SXin Li 
240*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcastq_epi64(__mmask8 __M,__m128i __A)241*67e74705SXin Li _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
242*67e74705SXin Li {
243*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512(__M,
244*67e74705SXin Li                                              (__v8di) _mm512_broadcastq_epi64(__A),
245*67e74705SXin Li                                              (__v8di) _mm512_setzero_si512());
246*67e74705SXin Li }
247*67e74705SXin Li 
248*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi32(__mmask16 __M,int __A)249*67e74705SXin Li _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
250*67e74705SXin Li {
251*67e74705SXin Li   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
252*67e74705SXin Li                  (__v16si)
253*67e74705SXin Li                  _mm512_setzero_si512 (),
254*67e74705SXin Li                  __M);
255*67e74705SXin Li }
256*67e74705SXin Li 
257*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_set1_epi64(__mmask8 __M,long long __A)258*67e74705SXin Li _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
259*67e74705SXin Li {
260*67e74705SXin Li #ifdef __x86_64__
261*67e74705SXin Li   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
262*67e74705SXin Li                  (__v8di)
263*67e74705SXin Li                  _mm512_setzero_si512 (),
264*67e74705SXin Li                  __M);
265*67e74705SXin Li #else
266*67e74705SXin Li   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
267*67e74705SXin Li                  (__v8di)
268*67e74705SXin Li                  _mm512_setzero_si512 (),
269*67e74705SXin Li                  __M);
270*67e74705SXin Li #endif
271*67e74705SXin Li }
272*67e74705SXin Li 
273*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_setzero_ps(void)274*67e74705SXin Li _mm512_setzero_ps(void)
275*67e74705SXin Li {
276*67e74705SXin Li   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
277*67e74705SXin Li                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
278*67e74705SXin Li }
279*67e74705SXin Li 
280*67e74705SXin Li #define _mm512_setzero _mm512_setzero_ps
281*67e74705SXin Li 
282*67e74705SXin Li static  __inline __m512d __DEFAULT_FN_ATTRS
_mm512_setzero_pd(void)283*67e74705SXin Li _mm512_setzero_pd(void)
284*67e74705SXin Li {
285*67e74705SXin Li   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
286*67e74705SXin Li }
287*67e74705SXin Li 
288*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_set1_ps(float __w)289*67e74705SXin Li _mm512_set1_ps(float __w)
290*67e74705SXin Li {
291*67e74705SXin Li   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
292*67e74705SXin Li                    __w, __w, __w, __w, __w, __w, __w, __w  };
293*67e74705SXin Li }
294*67e74705SXin Li 
295*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_set1_pd(double __w)296*67e74705SXin Li _mm512_set1_pd(double __w)
297*67e74705SXin Li {
298*67e74705SXin Li   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
299*67e74705SXin Li }
300*67e74705SXin Li 
301*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi8(char __w)302*67e74705SXin Li _mm512_set1_epi8(char __w)
303*67e74705SXin Li {
304*67e74705SXin Li   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
305*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
306*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
307*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
308*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
309*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
310*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
311*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w  };
312*67e74705SXin Li }
313*67e74705SXin Li 
314*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi16(short __w)315*67e74705SXin Li _mm512_set1_epi16(short __w)
316*67e74705SXin Li {
317*67e74705SXin Li   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
318*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
319*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w,
320*67e74705SXin Li                              __w, __w, __w, __w, __w, __w, __w, __w };
321*67e74705SXin Li }
322*67e74705SXin Li 
323*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi32(int __s)324*67e74705SXin Li _mm512_set1_epi32(int __s)
325*67e74705SXin Li {
326*67e74705SXin Li   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
327*67e74705SXin Li                              __s, __s, __s, __s, __s, __s, __s, __s };
328*67e74705SXin Li }
329*67e74705SXin Li 
330*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set1_epi64(long long __d)331*67e74705SXin Li _mm512_set1_epi64(long long __d)
332*67e74705SXin Li {
333*67e74705SXin Li   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
334*67e74705SXin Li }
335*67e74705SXin Li 
336*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcastss_ps(__m128 __A)337*67e74705SXin Li _mm512_broadcastss_ps(__m128 __A)
338*67e74705SXin Li {
339*67e74705SXin Li   return (__m512)__builtin_shufflevector((__v4sf) __A,
340*67e74705SXin Li                                          (__v4sf)_mm_undefined_ps(),
341*67e74705SXin Li                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
342*67e74705SXin Li }
343*67e74705SXin Li 
344*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set4_epi32(int __A,int __B,int __C,int __D)345*67e74705SXin Li _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
346*67e74705SXin Li {
347*67e74705SXin Li   return  (__m512i)(__v16si)
348*67e74705SXin Li    { __D, __C, __B, __A, __D, __C, __B, __A,
349*67e74705SXin Li      __D, __C, __B, __A, __D, __C, __B, __A };
350*67e74705SXin Li }
351*67e74705SXin Li 
352*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set4_epi64(long long __A,long long __B,long long __C,long long __D)353*67e74705SXin Li _mm512_set4_epi64 (long long __A, long long __B, long long __C,
354*67e74705SXin Li        long long __D)
355*67e74705SXin Li {
356*67e74705SXin Li   return  (__m512i) (__v8di)
357*67e74705SXin Li    { __D, __C, __B, __A, __D, __C, __B, __A };
358*67e74705SXin Li }
359*67e74705SXin Li 
360*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_set4_pd(double __A,double __B,double __C,double __D)361*67e74705SXin Li _mm512_set4_pd (double __A, double __B, double __C, double __D)
362*67e74705SXin Li {
363*67e74705SXin Li   return  (__m512d)
364*67e74705SXin Li    { __D, __C, __B, __A, __D, __C, __B, __A };
365*67e74705SXin Li }
366*67e74705SXin Li 
367*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_set4_ps(float __A,float __B,float __C,float __D)368*67e74705SXin Li _mm512_set4_ps (float __A, float __B, float __C, float __D)
369*67e74705SXin Li {
370*67e74705SXin Li   return  (__m512)
371*67e74705SXin Li    { __D, __C, __B, __A, __D, __C, __B, __A,
372*67e74705SXin Li      __D, __C, __B, __A, __D, __C, __B, __A };
373*67e74705SXin Li }
374*67e74705SXin Li 
375*67e74705SXin Li #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
376*67e74705SXin Li   _mm512_set4_epi32((e3),(e2),(e1),(e0))
377*67e74705SXin Li 
378*67e74705SXin Li #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
379*67e74705SXin Li   _mm512_set4_epi64((e3),(e2),(e1),(e0))
380*67e74705SXin Li 
381*67e74705SXin Li #define _mm512_setr4_pd(e0,e1,e2,e3)                \
382*67e74705SXin Li   _mm512_set4_pd((e3),(e2),(e1),(e0))
383*67e74705SXin Li 
384*67e74705SXin Li #define _mm512_setr4_ps(e0,e1,e2,e3)                \
385*67e74705SXin Li   _mm512_set4_ps((e3),(e2),(e1),(e0))
386*67e74705SXin Li 
387*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_broadcastsd_pd(__m128d __A)388*67e74705SXin Li _mm512_broadcastsd_pd(__m128d __A)
389*67e74705SXin Li {
390*67e74705SXin Li   return (__m512d)__builtin_shufflevector((__v2df) __A,
391*67e74705SXin Li                                           (__v2df) _mm_undefined_pd(),
392*67e74705SXin Li                                           0, 0, 0, 0, 0, 0, 0, 0);
393*67e74705SXin Li }
394*67e74705SXin Li 
395*67e74705SXin Li /* Cast between vector types */
396*67e74705SXin Li 
397*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_castpd256_pd512(__m256d __a)398*67e74705SXin Li _mm512_castpd256_pd512(__m256d __a)
399*67e74705SXin Li {
400*67e74705SXin Li   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
401*67e74705SXin Li }
402*67e74705SXin Li 
403*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_castps256_ps512(__m256 __a)404*67e74705SXin Li _mm512_castps256_ps512(__m256 __a)
405*67e74705SXin Li {
406*67e74705SXin Li   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
407*67e74705SXin Li                                           -1, -1, -1, -1, -1, -1, -1, -1);
408*67e74705SXin Li }
409*67e74705SXin Li 
410*67e74705SXin Li static __inline __m128d __DEFAULT_FN_ATTRS
_mm512_castpd512_pd128(__m512d __a)411*67e74705SXin Li _mm512_castpd512_pd128(__m512d __a)
412*67e74705SXin Li {
413*67e74705SXin Li   return __builtin_shufflevector(__a, __a, 0, 1);
414*67e74705SXin Li }
415*67e74705SXin Li 
416*67e74705SXin Li static __inline __m256d __DEFAULT_FN_ATTRS
_mm512_castpd512_pd256(__m512d __A)417*67e74705SXin Li _mm512_castpd512_pd256 (__m512d __A)
418*67e74705SXin Li {
419*67e74705SXin Li   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
420*67e74705SXin Li }
421*67e74705SXin Li 
422*67e74705SXin Li static __inline __m128 __DEFAULT_FN_ATTRS
_mm512_castps512_ps128(__m512 __a)423*67e74705SXin Li _mm512_castps512_ps128(__m512 __a)
424*67e74705SXin Li {
425*67e74705SXin Li   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
426*67e74705SXin Li }
427*67e74705SXin Li 
428*67e74705SXin Li static __inline __m256 __DEFAULT_FN_ATTRS
_mm512_castps512_ps256(__m512 __A)429*67e74705SXin Li _mm512_castps512_ps256 (__m512 __A)
430*67e74705SXin Li {
431*67e74705SXin Li   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
432*67e74705SXin Li }
433*67e74705SXin Li 
434*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_castpd_ps(__m512d __A)435*67e74705SXin Li _mm512_castpd_ps (__m512d __A)
436*67e74705SXin Li {
437*67e74705SXin Li   return (__m512) (__A);
438*67e74705SXin Li }
439*67e74705SXin Li 
440*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_castpd_si512(__m512d __A)441*67e74705SXin Li _mm512_castpd_si512 (__m512d __A)
442*67e74705SXin Li {
443*67e74705SXin Li   return (__m512i) (__A);
444*67e74705SXin Li }
445*67e74705SXin Li 
446*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_castpd128_pd512(__m128d __A)447*67e74705SXin Li _mm512_castpd128_pd512 (__m128d __A)
448*67e74705SXin Li {
449*67e74705SXin Li   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
450*67e74705SXin Li }
451*67e74705SXin Li 
452*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_castps_pd(__m512 __A)453*67e74705SXin Li _mm512_castps_pd (__m512 __A)
454*67e74705SXin Li {
455*67e74705SXin Li   return (__m512d) (__A);
456*67e74705SXin Li }
457*67e74705SXin Li 
458*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_castps_si512(__m512 __A)459*67e74705SXin Li _mm512_castps_si512 (__m512 __A)
460*67e74705SXin Li {
461*67e74705SXin Li   return (__m512i) (__A);
462*67e74705SXin Li }
463*67e74705SXin Li 
464*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_castps128_ps512(__m128 __A)465*67e74705SXin Li _mm512_castps128_ps512 (__m128 __A)
466*67e74705SXin Li {
467*67e74705SXin Li     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
468*67e74705SXin Li }
469*67e74705SXin Li 
470*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_castsi128_si512(__m128i __A)471*67e74705SXin Li _mm512_castsi128_si512 (__m128i __A)
472*67e74705SXin Li {
473*67e74705SXin Li    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
474*67e74705SXin Li }
475*67e74705SXin Li 
476*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_castsi256_si512(__m256i __A)477*67e74705SXin Li _mm512_castsi256_si512 (__m256i __A)
478*67e74705SXin Li {
479*67e74705SXin Li    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
480*67e74705SXin Li }
481*67e74705SXin Li 
482*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_castsi512_ps(__m512i __A)483*67e74705SXin Li _mm512_castsi512_ps (__m512i __A)
484*67e74705SXin Li {
485*67e74705SXin Li   return (__m512) (__A);
486*67e74705SXin Li }
487*67e74705SXin Li 
488*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_castsi512_pd(__m512i __A)489*67e74705SXin Li _mm512_castsi512_pd (__m512i __A)
490*67e74705SXin Li {
491*67e74705SXin Li   return (__m512d) (__A);
492*67e74705SXin Li }
493*67e74705SXin Li 
494*67e74705SXin Li static __inline __m128i __DEFAULT_FN_ATTRS
_mm512_castsi512_si128(__m512i __A)495*67e74705SXin Li _mm512_castsi512_si128 (__m512i __A)
496*67e74705SXin Li {
497*67e74705SXin Li   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
498*67e74705SXin Li }
499*67e74705SXin Li 
500*67e74705SXin Li static __inline __m256i __DEFAULT_FN_ATTRS
_mm512_castsi512_si256(__m512i __A)501*67e74705SXin Li _mm512_castsi512_si256 (__m512i __A)
502*67e74705SXin Li {
503*67e74705SXin Li   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
504*67e74705SXin Li }
505*67e74705SXin Li 
506*67e74705SXin Li /* Bitwise operators */
507*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_epi32(__m512i __a,__m512i __b)508*67e74705SXin Li _mm512_and_epi32(__m512i __a, __m512i __b)
509*67e74705SXin Li {
510*67e74705SXin Li   return (__m512i)((__v16su)__a & (__v16su)__b);
511*67e74705SXin Li }
512*67e74705SXin Li 
513*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_and_epi32(__m512i __src,__mmask16 __k,__m512i __a,__m512i __b)514*67e74705SXin Li _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
515*67e74705SXin Li {
516*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
517*67e74705SXin Li                 (__v16si) _mm512_and_epi32(__a, __b),
518*67e74705SXin Li                 (__v16si) __src);
519*67e74705SXin Li }
520*67e74705SXin Li 
521*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_and_epi32(__mmask16 __k,__m512i __a,__m512i __b)522*67e74705SXin Li _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
523*67e74705SXin Li {
524*67e74705SXin Li   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
525*67e74705SXin Li                                          __k, __a, __b);
526*67e74705SXin Li }
527*67e74705SXin Li 
528*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_epi64(__m512i __a,__m512i __b)529*67e74705SXin Li _mm512_and_epi64(__m512i __a, __m512i __b)
530*67e74705SXin Li {
531*67e74705SXin Li   return (__m512i)((__v8du)__a & (__v8du)__b);
532*67e74705SXin Li }
533*67e74705SXin Li 
534*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_and_epi64(__m512i __src,__mmask8 __k,__m512i __a,__m512i __b)535*67e74705SXin Li _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
536*67e74705SXin Li {
537*67e74705SXin Li     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
538*67e74705SXin Li                 (__v8di) _mm512_and_epi64(__a, __b),
539*67e74705SXin Li                 (__v8di) __src);
540*67e74705SXin Li }
541*67e74705SXin Li 
542*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_and_epi64(__mmask8 __k,__m512i __a,__m512i __b)543*67e74705SXin Li _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
544*67e74705SXin Li {
545*67e74705SXin Li   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
546*67e74705SXin Li                                          __k, __a, __b);
547*67e74705SXin Li }
548*67e74705SXin Li 
549*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_si512(__m512i __A,__m512i __B)550*67e74705SXin Li _mm512_andnot_si512 (__m512i __A, __m512i __B)
551*67e74705SXin Li {
552*67e74705SXin Li   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
553*67e74705SXin Li }
554*67e74705SXin Li 
555*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_epi32(__m512i __A,__m512i __B)556*67e74705SXin Li _mm512_andnot_epi32 (__m512i __A, __m512i __B)
557*67e74705SXin Li {
558*67e74705SXin Li   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
559*67e74705SXin Li }
560*67e74705SXin Li 
561*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_andnot_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)562*67e74705SXin Li _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
563*67e74705SXin Li {
564*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
565*67e74705SXin Li                                          (__v16si)_mm512_andnot_epi32(__A, __B),
566*67e74705SXin Li                                          (__v16si)__W);
567*67e74705SXin Li }
568*67e74705SXin Li 
569*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_epi32(__mmask16 __U,__m512i __A,__m512i __B)570*67e74705SXin Li _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
571*67e74705SXin Li {
572*67e74705SXin Li   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
573*67e74705SXin Li                                            __U, __A, __B);
574*67e74705SXin Li }
575*67e74705SXin Li 
576*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_epi64(__m512i __A,__m512i __B)577*67e74705SXin Li _mm512_andnot_epi64(__m512i __A, __m512i __B)
578*67e74705SXin Li {
579*67e74705SXin Li   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
580*67e74705SXin Li }
581*67e74705SXin Li 
582*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_andnot_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)583*67e74705SXin Li _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
584*67e74705SXin Li {
585*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
586*67e74705SXin Li                                           (__v8di)_mm512_andnot_epi64(__A, __B),
587*67e74705SXin Li                                           (__v8di)__W);
588*67e74705SXin Li }
589*67e74705SXin Li 
590*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_epi64(__mmask8 __U,__m512i __A,__m512i __B)591*67e74705SXin Li _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
592*67e74705SXin Li {
593*67e74705SXin Li   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
594*67e74705SXin Li                                            __U, __A, __B);
595*67e74705SXin Li }
596*67e74705SXin Li 
597*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_epi32(__m512i __a,__m512i __b)598*67e74705SXin Li _mm512_or_epi32(__m512i __a, __m512i __b)
599*67e74705SXin Li {
600*67e74705SXin Li   return (__m512i)((__v16su)__a | (__v16su)__b);
601*67e74705SXin Li }
602*67e74705SXin Li 
603*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_or_epi32(__m512i __src,__mmask16 __k,__m512i __a,__m512i __b)604*67e74705SXin Li _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
605*67e74705SXin Li {
606*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
607*67e74705SXin Li                                              (__v16si)_mm512_or_epi32(__a, __b),
608*67e74705SXin Li                                              (__v16si)__src);
609*67e74705SXin Li }
610*67e74705SXin Li 
611*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_or_epi32(__mmask16 __k,__m512i __a,__m512i __b)612*67e74705SXin Li _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
613*67e74705SXin Li {
614*67e74705SXin Li   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
615*67e74705SXin Li }
616*67e74705SXin Li 
617*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_epi64(__m512i __a,__m512i __b)618*67e74705SXin Li _mm512_or_epi64(__m512i __a, __m512i __b)
619*67e74705SXin Li {
620*67e74705SXin Li   return (__m512i)((__v8du)__a | (__v8du)__b);
621*67e74705SXin Li }
622*67e74705SXin Li 
623*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_or_epi64(__m512i __src,__mmask8 __k,__m512i __a,__m512i __b)624*67e74705SXin Li _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
625*67e74705SXin Li {
626*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
627*67e74705SXin Li                                              (__v8di)_mm512_or_epi64(__a, __b),
628*67e74705SXin Li                                              (__v8di)__src);
629*67e74705SXin Li }
630*67e74705SXin Li 
631*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_or_epi64(__mmask8 __k,__m512i __a,__m512i __b)632*67e74705SXin Li _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
633*67e74705SXin Li {
634*67e74705SXin Li   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
635*67e74705SXin Li }
636*67e74705SXin Li 
637*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_epi32(__m512i __a,__m512i __b)638*67e74705SXin Li _mm512_xor_epi32(__m512i __a, __m512i __b)
639*67e74705SXin Li {
640*67e74705SXin Li   return (__m512i)((__v16su)__a ^ (__v16su)__b);
641*67e74705SXin Li }
642*67e74705SXin Li 
643*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_xor_epi32(__m512i __src,__mmask16 __k,__m512i __a,__m512i __b)644*67e74705SXin Li _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
645*67e74705SXin Li {
646*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
647*67e74705SXin Li                                             (__v16si)_mm512_xor_epi32(__a, __b),
648*67e74705SXin Li                                             (__v16si)__src);
649*67e74705SXin Li }
650*67e74705SXin Li 
651*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_xor_epi32(__mmask16 __k,__m512i __a,__m512i __b)652*67e74705SXin Li _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
653*67e74705SXin Li {
654*67e74705SXin Li   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
655*67e74705SXin Li }
656*67e74705SXin Li 
657*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_epi64(__m512i __a,__m512i __b)658*67e74705SXin Li _mm512_xor_epi64(__m512i __a, __m512i __b)
659*67e74705SXin Li {
660*67e74705SXin Li   return (__m512i)((__v8du)__a ^ (__v8du)__b);
661*67e74705SXin Li }
662*67e74705SXin Li 
663*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_xor_epi64(__m512i __src,__mmask8 __k,__m512i __a,__m512i __b)664*67e74705SXin Li _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
665*67e74705SXin Li {
666*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
667*67e74705SXin Li                                              (__v8di)_mm512_xor_epi64(__a, __b),
668*67e74705SXin Li                                              (__v8di)__src);
669*67e74705SXin Li }
670*67e74705SXin Li 
671*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_xor_epi64(__mmask8 __k,__m512i __a,__m512i __b)672*67e74705SXin Li _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
673*67e74705SXin Li {
674*67e74705SXin Li   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
675*67e74705SXin Li }
676*67e74705SXin Li 
677*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_and_si512(__m512i __a,__m512i __b)678*67e74705SXin Li _mm512_and_si512(__m512i __a, __m512i __b)
679*67e74705SXin Li {
680*67e74705SXin Li   return (__m512i)((__v8du)__a & (__v8du)__b);
681*67e74705SXin Li }
682*67e74705SXin Li 
683*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_or_si512(__m512i __a,__m512i __b)684*67e74705SXin Li _mm512_or_si512(__m512i __a, __m512i __b)
685*67e74705SXin Li {
686*67e74705SXin Li   return (__m512i)((__v8du)__a | (__v8du)__b);
687*67e74705SXin Li }
688*67e74705SXin Li 
689*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_xor_si512(__m512i __a,__m512i __b)690*67e74705SXin Li _mm512_xor_si512(__m512i __a, __m512i __b)
691*67e74705SXin Li {
692*67e74705SXin Li   return (__m512i)((__v8du)__a ^ (__v8du)__b);
693*67e74705SXin Li }
694*67e74705SXin Li 
695*67e74705SXin Li /* Arithmetic */
696*67e74705SXin Li 
697*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_add_pd(__m512d __a,__m512d __b)698*67e74705SXin Li _mm512_add_pd(__m512d __a, __m512d __b)
699*67e74705SXin Li {
700*67e74705SXin Li   return (__m512d)((__v8df)__a + (__v8df)__b);
701*67e74705SXin Li }
702*67e74705SXin Li 
703*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_add_ps(__m512 __a,__m512 __b)704*67e74705SXin Li _mm512_add_ps(__m512 __a, __m512 __b)
705*67e74705SXin Li {
706*67e74705SXin Li   return (__m512)((__v16sf)__a + (__v16sf)__b);
707*67e74705SXin Li }
708*67e74705SXin Li 
709*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_mul_pd(__m512d __a,__m512d __b)710*67e74705SXin Li _mm512_mul_pd(__m512d __a, __m512d __b)
711*67e74705SXin Li {
712*67e74705SXin Li   return (__m512d)((__v8df)__a * (__v8df)__b);
713*67e74705SXin Li }
714*67e74705SXin Li 
715*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_mul_ps(__m512 __a,__m512 __b)716*67e74705SXin Li _mm512_mul_ps(__m512 __a, __m512 __b)
717*67e74705SXin Li {
718*67e74705SXin Li   return (__m512)((__v16sf)__a * (__v16sf)__b);
719*67e74705SXin Li }
720*67e74705SXin Li 
721*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_sub_pd(__m512d __a,__m512d __b)722*67e74705SXin Li _mm512_sub_pd(__m512d __a, __m512d __b)
723*67e74705SXin Li {
724*67e74705SXin Li   return (__m512d)((__v8df)__a - (__v8df)__b);
725*67e74705SXin Li }
726*67e74705SXin Li 
727*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_sub_ps(__m512 __a,__m512 __b)728*67e74705SXin Li _mm512_sub_ps(__m512 __a, __m512 __b)
729*67e74705SXin Li {
730*67e74705SXin Li   return (__m512)((__v16sf)__a - (__v16sf)__b);
731*67e74705SXin Li }
732*67e74705SXin Li 
733*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi64(__m512i __A,__m512i __B)734*67e74705SXin Li _mm512_add_epi64 (__m512i __A, __m512i __B)
735*67e74705SXin Li {
736*67e74705SXin Li   return (__m512i) ((__v8du) __A + (__v8du) __B);
737*67e74705SXin Li }
738*67e74705SXin Li 
739*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_add_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)740*67e74705SXin Li _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
741*67e74705SXin Li {
742*67e74705SXin Li   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
743*67e74705SXin Li              (__v8di) __B,
744*67e74705SXin Li              (__v8di) __W,
745*67e74705SXin Li              (__mmask8) __U);
746*67e74705SXin Li }
747*67e74705SXin Li 
748*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_add_epi64(__mmask8 __U,__m512i __A,__m512i __B)749*67e74705SXin Li _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
750*67e74705SXin Li {
751*67e74705SXin Li   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
752*67e74705SXin Li              (__v8di) __B,
753*67e74705SXin Li              (__v8di)
754*67e74705SXin Li              _mm512_setzero_si512 (),
755*67e74705SXin Li              (__mmask8) __U);
756*67e74705SXin Li }
757*67e74705SXin Li 
758*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi64(__m512i __A,__m512i __B)759*67e74705SXin Li _mm512_sub_epi64 (__m512i __A, __m512i __B)
760*67e74705SXin Li {
761*67e74705SXin Li   return (__m512i) ((__v8du) __A - (__v8du) __B);
762*67e74705SXin Li }
763*67e74705SXin Li 
764*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sub_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)765*67e74705SXin Li _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
766*67e74705SXin Li {
767*67e74705SXin Li   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
768*67e74705SXin Li              (__v8di) __B,
769*67e74705SXin Li              (__v8di) __W,
770*67e74705SXin Li              (__mmask8) __U);
771*67e74705SXin Li }
772*67e74705SXin Li 
773*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sub_epi64(__mmask8 __U,__m512i __A,__m512i __B)774*67e74705SXin Li _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
775*67e74705SXin Li {
776*67e74705SXin Li   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
777*67e74705SXin Li              (__v8di) __B,
778*67e74705SXin Li              (__v8di)
779*67e74705SXin Li              _mm512_setzero_si512 (),
780*67e74705SXin Li              (__mmask8) __U);
781*67e74705SXin Li }
782*67e74705SXin Li 
783*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_add_epi32(__m512i __A,__m512i __B)784*67e74705SXin Li _mm512_add_epi32 (__m512i __A, __m512i __B)
785*67e74705SXin Li {
786*67e74705SXin Li   return (__m512i) ((__v16su) __A + (__v16su) __B);
787*67e74705SXin Li }
788*67e74705SXin Li 
789*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_add_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)790*67e74705SXin Li _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
791*67e74705SXin Li {
792*67e74705SXin Li   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
793*67e74705SXin Li              (__v16si) __B,
794*67e74705SXin Li              (__v16si) __W,
795*67e74705SXin Li              (__mmask16) __U);
796*67e74705SXin Li }
797*67e74705SXin Li 
798*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_add_epi32(__mmask16 __U,__m512i __A,__m512i __B)799*67e74705SXin Li _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
800*67e74705SXin Li {
801*67e74705SXin Li   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
802*67e74705SXin Li              (__v16si) __B,
803*67e74705SXin Li              (__v16si)
804*67e74705SXin Li              _mm512_setzero_si512 (),
805*67e74705SXin Li              (__mmask16) __U);
806*67e74705SXin Li }
807*67e74705SXin Li 
808*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sub_epi32(__m512i __A,__m512i __B)809*67e74705SXin Li _mm512_sub_epi32 (__m512i __A, __m512i __B)
810*67e74705SXin Li {
811*67e74705SXin Li   return (__m512i) ((__v16su) __A - (__v16su) __B);
812*67e74705SXin Li }
813*67e74705SXin Li 
814*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sub_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)815*67e74705SXin Li _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
816*67e74705SXin Li {
817*67e74705SXin Li   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
818*67e74705SXin Li              (__v16si) __B,
819*67e74705SXin Li              (__v16si) __W,
820*67e74705SXin Li              (__mmask16) __U);
821*67e74705SXin Li }
822*67e74705SXin Li 
823*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sub_epi32(__mmask16 __U,__m512i __A,__m512i __B)824*67e74705SXin Li _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
825*67e74705SXin Li {
826*67e74705SXin Li   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
827*67e74705SXin Li              (__v16si) __B,
828*67e74705SXin Li              (__v16si)
829*67e74705SXin Li              _mm512_setzero_si512 (),
830*67e74705SXin Li              (__mmask16) __U);
831*67e74705SXin Li }
832*67e74705SXin Li 
833*67e74705SXin Li #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
834*67e74705SXin Li   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
835*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
836*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
837*67e74705SXin Li                                         (int)(R)); })
838*67e74705SXin Li 
839*67e74705SXin Li #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
840*67e74705SXin Li   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
841*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
842*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
843*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
844*67e74705SXin Li 
845*67e74705SXin Li #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
846*67e74705SXin Li   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
847*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
848*67e74705SXin Li                                         (__v8df)_mm512_undefined_pd(), \
849*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
850*67e74705SXin Li 
851*67e74705SXin Li static  __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_max_pd(__m512d __A,__m512d __B)852*67e74705SXin Li _mm512_max_pd(__m512d __A, __m512d __B)
853*67e74705SXin Li {
854*67e74705SXin Li   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
855*67e74705SXin Li              (__v8df) __B,
856*67e74705SXin Li              (__v8df)
857*67e74705SXin Li              _mm512_setzero_pd (),
858*67e74705SXin Li              (__mmask8) -1,
859*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
860*67e74705SXin Li }
861*67e74705SXin Li 
862*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_max_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)863*67e74705SXin Li _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
864*67e74705SXin Li {
865*67e74705SXin Li   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
866*67e74705SXin Li                   (__v8df) __B,
867*67e74705SXin Li                   (__v8df) __W,
868*67e74705SXin Li                   (__mmask8) __U,
869*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
870*67e74705SXin Li }
871*67e74705SXin Li 
872*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_max_pd(__mmask8 __U,__m512d __A,__m512d __B)873*67e74705SXin Li _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
874*67e74705SXin Li {
875*67e74705SXin Li   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
876*67e74705SXin Li                   (__v8df) __B,
877*67e74705SXin Li                   (__v8df)
878*67e74705SXin Li                   _mm512_setzero_pd (),
879*67e74705SXin Li                   (__mmask8) __U,
880*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
881*67e74705SXin Li }
882*67e74705SXin Li 
883*67e74705SXin Li #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
884*67e74705SXin Li   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
885*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
886*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
887*67e74705SXin Li                                        (int)(R)); })
888*67e74705SXin Li 
889*67e74705SXin Li #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
890*67e74705SXin Li   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
891*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
892*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
893*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); })
894*67e74705SXin Li 
895*67e74705SXin Li #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
896*67e74705SXin Li   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
897*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
898*67e74705SXin Li                                        (__v16sf)_mm512_undefined_ps(), \
899*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
900*67e74705SXin Li 
901*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_max_ps(__m512 __A,__m512 __B)902*67e74705SXin Li _mm512_max_ps(__m512 __A, __m512 __B)
903*67e74705SXin Li {
904*67e74705SXin Li   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
905*67e74705SXin Li             (__v16sf) __B,
906*67e74705SXin Li             (__v16sf)
907*67e74705SXin Li             _mm512_setzero_ps (),
908*67e74705SXin Li             (__mmask16) -1,
909*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
910*67e74705SXin Li }
911*67e74705SXin Li 
912*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_max_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)913*67e74705SXin Li _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
914*67e74705SXin Li {
915*67e74705SXin Li   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
916*67e74705SXin Li                  (__v16sf) __B,
917*67e74705SXin Li                  (__v16sf) __W,
918*67e74705SXin Li                  (__mmask16) __U,
919*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
920*67e74705SXin Li }
921*67e74705SXin Li 
922*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_max_ps(__mmask16 __U,__m512 __A,__m512 __B)923*67e74705SXin Li _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
924*67e74705SXin Li {
925*67e74705SXin Li   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
926*67e74705SXin Li                  (__v16sf) __B,
927*67e74705SXin Li                  (__v16sf)
928*67e74705SXin Li                  _mm512_setzero_ps (),
929*67e74705SXin Li                  (__mmask16) __U,
930*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
931*67e74705SXin Li }
932*67e74705SXin Li 
933*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_max_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)934*67e74705SXin Li _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
935*67e74705SXin Li   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
936*67e74705SXin Li                 (__v4sf) __B,
937*67e74705SXin Li                 (__v4sf) __W,
938*67e74705SXin Li                 (__mmask8) __U,
939*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
940*67e74705SXin Li }
941*67e74705SXin Li 
942*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_max_ss(__mmask8 __U,__m128 __A,__m128 __B)943*67e74705SXin Li _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
944*67e74705SXin Li   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
945*67e74705SXin Li                 (__v4sf) __B,
946*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
947*67e74705SXin Li                 (__mmask8) __U,
948*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
949*67e74705SXin Li }
950*67e74705SXin Li 
951*67e74705SXin Li #define _mm_max_round_ss(A, B, R) __extension__ ({ \
952*67e74705SXin Li   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
953*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
954*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
955*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
956*67e74705SXin Li 
957*67e74705SXin Li #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
958*67e74705SXin Li   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
959*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
960*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
961*67e74705SXin Li                                           (int)(R)); })
962*67e74705SXin Li 
963*67e74705SXin Li #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
964*67e74705SXin Li   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
965*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
966*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
967*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
968*67e74705SXin Li 
969*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_max_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)970*67e74705SXin Li _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
971*67e74705SXin Li   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
972*67e74705SXin Li                 (__v2df) __B,
973*67e74705SXin Li                 (__v2df) __W,
974*67e74705SXin Li                 (__mmask8) __U,
975*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
976*67e74705SXin Li }
977*67e74705SXin Li 
978*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_max_sd(__mmask8 __U,__m128d __A,__m128d __B)979*67e74705SXin Li _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
980*67e74705SXin Li   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
981*67e74705SXin Li                 (__v2df) __B,
982*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
983*67e74705SXin Li                 (__mmask8) __U,
984*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
985*67e74705SXin Li }
986*67e74705SXin Li 
987*67e74705SXin Li #define _mm_max_round_sd(A, B, R) __extension__ ({ \
988*67e74705SXin Li   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
989*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
990*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
991*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
992*67e74705SXin Li 
993*67e74705SXin Li #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
994*67e74705SXin Li   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
995*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
996*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
997*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
998*67e74705SXin Li 
999*67e74705SXin Li #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1000*67e74705SXin Li   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1001*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1002*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
1003*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
1004*67e74705SXin Li 
1005*67e74705SXin Li static __inline __m512i
1006*67e74705SXin Li __DEFAULT_FN_ATTRS
_mm512_max_epi32(__m512i __A,__m512i __B)1007*67e74705SXin Li _mm512_max_epi32(__m512i __A, __m512i __B)
1008*67e74705SXin Li {
1009*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1010*67e74705SXin Li               (__v16si) __B,
1011*67e74705SXin Li               (__v16si)
1012*67e74705SXin Li               _mm512_setzero_si512 (),
1013*67e74705SXin Li               (__mmask16) -1);
1014*67e74705SXin Li }
1015*67e74705SXin Li 
1016*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_max_epi32(__m512i __W,__mmask16 __M,__m512i __A,__m512i __B)1017*67e74705SXin Li _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1018*67e74705SXin Li {
1019*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1020*67e74705SXin Li                    (__v16si) __B,
1021*67e74705SXin Li                    (__v16si) __W, __M);
1022*67e74705SXin Li }
1023*67e74705SXin Li 
1024*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_max_epi32(__mmask16 __M,__m512i __A,__m512i __B)1025*67e74705SXin Li _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1026*67e74705SXin Li {
1027*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1028*67e74705SXin Li                    (__v16si) __B,
1029*67e74705SXin Li                    (__v16si)
1030*67e74705SXin Li                    _mm512_setzero_si512 (),
1031*67e74705SXin Li                    __M);
1032*67e74705SXin Li }
1033*67e74705SXin Li 
1034*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_max_epu32(__m512i __A,__m512i __B)1035*67e74705SXin Li _mm512_max_epu32(__m512i __A, __m512i __B)
1036*67e74705SXin Li {
1037*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1038*67e74705SXin Li               (__v16si) __B,
1039*67e74705SXin Li               (__v16si)
1040*67e74705SXin Li               _mm512_setzero_si512 (),
1041*67e74705SXin Li               (__mmask16) -1);
1042*67e74705SXin Li }
1043*67e74705SXin Li 
1044*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_max_epu32(__m512i __W,__mmask16 __M,__m512i __A,__m512i __B)1045*67e74705SXin Li _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1046*67e74705SXin Li {
1047*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1048*67e74705SXin Li                    (__v16si) __B,
1049*67e74705SXin Li                    (__v16si) __W, __M);
1050*67e74705SXin Li }
1051*67e74705SXin Li 
1052*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_max_epu32(__mmask16 __M,__m512i __A,__m512i __B)1053*67e74705SXin Li _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1054*67e74705SXin Li {
1055*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1056*67e74705SXin Li                    (__v16si) __B,
1057*67e74705SXin Li                    (__v16si)
1058*67e74705SXin Li                    _mm512_setzero_si512 (),
1059*67e74705SXin Li                    __M);
1060*67e74705SXin Li }
1061*67e74705SXin Li 
1062*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_max_epi64(__m512i __A,__m512i __B)1063*67e74705SXin Li _mm512_max_epi64(__m512i __A, __m512i __B)
1064*67e74705SXin Li {
1065*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1066*67e74705SXin Li               (__v8di) __B,
1067*67e74705SXin Li               (__v8di)
1068*67e74705SXin Li               _mm512_setzero_si512 (),
1069*67e74705SXin Li               (__mmask8) -1);
1070*67e74705SXin Li }
1071*67e74705SXin Li 
1072*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_max_epi64(__m512i __W,__mmask8 __M,__m512i __A,__m512i __B)1073*67e74705SXin Li _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1074*67e74705SXin Li {
1075*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1076*67e74705SXin Li                    (__v8di) __B,
1077*67e74705SXin Li                    (__v8di) __W, __M);
1078*67e74705SXin Li }
1079*67e74705SXin Li 
1080*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_max_epi64(__mmask8 __M,__m512i __A,__m512i __B)1081*67e74705SXin Li _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1082*67e74705SXin Li {
1083*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1084*67e74705SXin Li                    (__v8di) __B,
1085*67e74705SXin Li                    (__v8di)
1086*67e74705SXin Li                    _mm512_setzero_si512 (),
1087*67e74705SXin Li                    __M);
1088*67e74705SXin Li }
1089*67e74705SXin Li 
1090*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_max_epu64(__m512i __A,__m512i __B)1091*67e74705SXin Li _mm512_max_epu64(__m512i __A, __m512i __B)
1092*67e74705SXin Li {
1093*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1094*67e74705SXin Li               (__v8di) __B,
1095*67e74705SXin Li               (__v8di)
1096*67e74705SXin Li               _mm512_setzero_si512 (),
1097*67e74705SXin Li               (__mmask8) -1);
1098*67e74705SXin Li }
1099*67e74705SXin Li 
1100*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_max_epu64(__m512i __W,__mmask8 __M,__m512i __A,__m512i __B)1101*67e74705SXin Li _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1102*67e74705SXin Li {
1103*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1104*67e74705SXin Li                    (__v8di) __B,
1105*67e74705SXin Li                    (__v8di) __W, __M);
1106*67e74705SXin Li }
1107*67e74705SXin Li 
1108*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_max_epu64(__mmask8 __M,__m512i __A,__m512i __B)1109*67e74705SXin Li _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1110*67e74705SXin Li {
1111*67e74705SXin Li   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1112*67e74705SXin Li                    (__v8di) __B,
1113*67e74705SXin Li                    (__v8di)
1114*67e74705SXin Li                    _mm512_setzero_si512 (),
1115*67e74705SXin Li                    __M);
1116*67e74705SXin Li }
1117*67e74705SXin Li 
1118*67e74705SXin Li #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1119*67e74705SXin Li   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1120*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
1121*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1122*67e74705SXin Li                                         (int)(R)); })
1123*67e74705SXin Li 
1124*67e74705SXin Li #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1125*67e74705SXin Li   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1126*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
1127*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
1128*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
1129*67e74705SXin Li 
1130*67e74705SXin Li #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1131*67e74705SXin Li   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1132*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
1133*67e74705SXin Li                                         (__v8df)_mm512_undefined_pd(), \
1134*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
1135*67e74705SXin Li 
1136*67e74705SXin Li static  __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_min_pd(__m512d __A,__m512d __B)1137*67e74705SXin Li _mm512_min_pd(__m512d __A, __m512d __B)
1138*67e74705SXin Li {
1139*67e74705SXin Li   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1140*67e74705SXin Li              (__v8df) __B,
1141*67e74705SXin Li              (__v8df)
1142*67e74705SXin Li              _mm512_setzero_pd (),
1143*67e74705SXin Li              (__mmask8) -1,
1144*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
1145*67e74705SXin Li }
1146*67e74705SXin Li 
1147*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_min_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)1148*67e74705SXin Li _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1149*67e74705SXin Li {
1150*67e74705SXin Li   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1151*67e74705SXin Li                   (__v8df) __B,
1152*67e74705SXin Li                   (__v8df) __W,
1153*67e74705SXin Li                   (__mmask8) __U,
1154*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
1155*67e74705SXin Li }
1156*67e74705SXin Li 
1157*67e74705SXin Li #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1158*67e74705SXin Li   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1159*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
1160*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1161*67e74705SXin Li                                        (int)(R)); })
1162*67e74705SXin Li 
1163*67e74705SXin Li #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1164*67e74705SXin Li   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1165*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
1166*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
1167*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); })
1168*67e74705SXin Li 
1169*67e74705SXin Li #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1170*67e74705SXin Li   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1171*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
1172*67e74705SXin Li                                        (__v16sf)_mm512_undefined_ps(), \
1173*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
1174*67e74705SXin Li 
1175*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_min_pd(__mmask8 __U,__m512d __A,__m512d __B)1176*67e74705SXin Li _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1177*67e74705SXin Li {
1178*67e74705SXin Li   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1179*67e74705SXin Li                   (__v8df) __B,
1180*67e74705SXin Li                   (__v8df)
1181*67e74705SXin Li                   _mm512_setzero_pd (),
1182*67e74705SXin Li                   (__mmask8) __U,
1183*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
1184*67e74705SXin Li }
1185*67e74705SXin Li 
1186*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_min_ps(__m512 __A,__m512 __B)1187*67e74705SXin Li _mm512_min_ps(__m512 __A, __m512 __B)
1188*67e74705SXin Li {
1189*67e74705SXin Li   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1190*67e74705SXin Li             (__v16sf) __B,
1191*67e74705SXin Li             (__v16sf)
1192*67e74705SXin Li             _mm512_setzero_ps (),
1193*67e74705SXin Li             (__mmask16) -1,
1194*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
1195*67e74705SXin Li }
1196*67e74705SXin Li 
1197*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_min_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)1198*67e74705SXin Li _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1199*67e74705SXin Li {
1200*67e74705SXin Li   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1201*67e74705SXin Li                  (__v16sf) __B,
1202*67e74705SXin Li                  (__v16sf) __W,
1203*67e74705SXin Li                  (__mmask16) __U,
1204*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
1205*67e74705SXin Li }
1206*67e74705SXin Li 
1207*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_min_ps(__mmask16 __U,__m512 __A,__m512 __B)1208*67e74705SXin Li _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1209*67e74705SXin Li {
1210*67e74705SXin Li   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1211*67e74705SXin Li                  (__v16sf) __B,
1212*67e74705SXin Li                  (__v16sf)
1213*67e74705SXin Li                  _mm512_setzero_ps (),
1214*67e74705SXin Li                  (__mmask16) __U,
1215*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
1216*67e74705SXin Li }
1217*67e74705SXin Li 
1218*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_min_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)1219*67e74705SXin Li _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1220*67e74705SXin Li   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1221*67e74705SXin Li                 (__v4sf) __B,
1222*67e74705SXin Li                 (__v4sf) __W,
1223*67e74705SXin Li                 (__mmask8) __U,
1224*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1225*67e74705SXin Li }
1226*67e74705SXin Li 
1227*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_min_ss(__mmask8 __U,__m128 __A,__m128 __B)1228*67e74705SXin Li _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1229*67e74705SXin Li   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1230*67e74705SXin Li                 (__v4sf) __B,
1231*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
1232*67e74705SXin Li                 (__mmask8) __U,
1233*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1234*67e74705SXin Li }
1235*67e74705SXin Li 
1236*67e74705SXin Li #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1237*67e74705SXin Li   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1238*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1239*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
1240*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
1241*67e74705SXin Li 
1242*67e74705SXin Li #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1243*67e74705SXin Li   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1244*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1245*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1246*67e74705SXin Li                                           (int)(R)); })
1247*67e74705SXin Li 
1248*67e74705SXin Li #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1249*67e74705SXin Li   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1250*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1251*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
1252*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
1253*67e74705SXin Li 
1254*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_min_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)1255*67e74705SXin Li _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1256*67e74705SXin Li   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1257*67e74705SXin Li                 (__v2df) __B,
1258*67e74705SXin Li                 (__v2df) __W,
1259*67e74705SXin Li                 (__mmask8) __U,
1260*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1261*67e74705SXin Li }
1262*67e74705SXin Li 
1263*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_min_sd(__mmask8 __U,__m128d __A,__m128d __B)1264*67e74705SXin Li _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1265*67e74705SXin Li   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1266*67e74705SXin Li                 (__v2df) __B,
1267*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
1268*67e74705SXin Li                 (__mmask8) __U,
1269*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1270*67e74705SXin Li }
1271*67e74705SXin Li 
1272*67e74705SXin Li #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1273*67e74705SXin Li   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1274*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1275*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
1276*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
1277*67e74705SXin Li 
1278*67e74705SXin Li #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1279*67e74705SXin Li   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1280*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1281*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
1282*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
1283*67e74705SXin Li 
1284*67e74705SXin Li #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1285*67e74705SXin Li   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1286*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1287*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
1288*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
1289*67e74705SXin Li 
1290*67e74705SXin Li static __inline __m512i
1291*67e74705SXin Li __DEFAULT_FN_ATTRS
_mm512_min_epi32(__m512i __A,__m512i __B)1292*67e74705SXin Li _mm512_min_epi32(__m512i __A, __m512i __B)
1293*67e74705SXin Li {
1294*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1295*67e74705SXin Li               (__v16si) __B,
1296*67e74705SXin Li               (__v16si)
1297*67e74705SXin Li               _mm512_setzero_si512 (),
1298*67e74705SXin Li               (__mmask16) -1);
1299*67e74705SXin Li }
1300*67e74705SXin Li 
1301*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_min_epi32(__m512i __W,__mmask16 __M,__m512i __A,__m512i __B)1302*67e74705SXin Li _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1303*67e74705SXin Li {
1304*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1305*67e74705SXin Li                    (__v16si) __B,
1306*67e74705SXin Li                    (__v16si) __W, __M);
1307*67e74705SXin Li }
1308*67e74705SXin Li 
1309*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_min_epi32(__mmask16 __M,__m512i __A,__m512i __B)1310*67e74705SXin Li _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1311*67e74705SXin Li {
1312*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1313*67e74705SXin Li                    (__v16si) __B,
1314*67e74705SXin Li                    (__v16si)
1315*67e74705SXin Li                    _mm512_setzero_si512 (),
1316*67e74705SXin Li                    __M);
1317*67e74705SXin Li }
1318*67e74705SXin Li 
1319*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_min_epu32(__m512i __A,__m512i __B)1320*67e74705SXin Li _mm512_min_epu32(__m512i __A, __m512i __B)
1321*67e74705SXin Li {
1322*67e74705SXin Li   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1323*67e74705SXin Li               (__v16si) __B,
1324*67e74705SXin Li               (__v16si)
1325*67e74705SXin Li               _mm512_setzero_si512 (),
1326*67e74705SXin Li               (__mmask16) -1);
1327*67e74705SXin Li }
1328*67e74705SXin Li 
1329*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_min_epu32(__m512i __W,__mmask16 __M,__m512i __A,__m512i __B)1330*67e74705SXin Li _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1331*67e74705SXin Li {
1332*67e74705SXin Li   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1333*67e74705SXin Li                    (__v16si) __B,
1334*67e74705SXin Li                    (__v16si) __W, __M);
1335*67e74705SXin Li }
1336*67e74705SXin Li 
1337*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_min_epu32(__mmask16 __M,__m512i __A,__m512i __B)1338*67e74705SXin Li _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1339*67e74705SXin Li {
1340*67e74705SXin Li   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1341*67e74705SXin Li                    (__v16si) __B,
1342*67e74705SXin Li                    (__v16si)
1343*67e74705SXin Li                    _mm512_setzero_si512 (),
1344*67e74705SXin Li                    __M);
1345*67e74705SXin Li }
1346*67e74705SXin Li 
1347*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_min_epi64(__m512i __A,__m512i __B)1348*67e74705SXin Li _mm512_min_epi64(__m512i __A, __m512i __B)
1349*67e74705SXin Li {
1350*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1351*67e74705SXin Li               (__v8di) __B,
1352*67e74705SXin Li               (__v8di)
1353*67e74705SXin Li               _mm512_setzero_si512 (),
1354*67e74705SXin Li               (__mmask8) -1);
1355*67e74705SXin Li }
1356*67e74705SXin Li 
1357*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_min_epi64(__m512i __W,__mmask8 __M,__m512i __A,__m512i __B)1358*67e74705SXin Li _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1359*67e74705SXin Li {
1360*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1361*67e74705SXin Li                    (__v8di) __B,
1362*67e74705SXin Li                    (__v8di) __W, __M);
1363*67e74705SXin Li }
1364*67e74705SXin Li 
1365*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_min_epi64(__mmask8 __M,__m512i __A,__m512i __B)1366*67e74705SXin Li _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1367*67e74705SXin Li {
1368*67e74705SXin Li   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1369*67e74705SXin Li                    (__v8di) __B,
1370*67e74705SXin Li                    (__v8di)
1371*67e74705SXin Li                    _mm512_setzero_si512 (),
1372*67e74705SXin Li                    __M);
1373*67e74705SXin Li }
1374*67e74705SXin Li 
1375*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_min_epu64(__m512i __A,__m512i __B)1376*67e74705SXin Li _mm512_min_epu64(__m512i __A, __m512i __B)
1377*67e74705SXin Li {
1378*67e74705SXin Li   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1379*67e74705SXin Li               (__v8di) __B,
1380*67e74705SXin Li               (__v8di)
1381*67e74705SXin Li               _mm512_setzero_si512 (),
1382*67e74705SXin Li               (__mmask8) -1);
1383*67e74705SXin Li }
1384*67e74705SXin Li 
1385*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_min_epu64(__m512i __W,__mmask8 __M,__m512i __A,__m512i __B)1386*67e74705SXin Li _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1387*67e74705SXin Li {
1388*67e74705SXin Li   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1389*67e74705SXin Li                    (__v8di) __B,
1390*67e74705SXin Li                    (__v8di) __W, __M);
1391*67e74705SXin Li }
1392*67e74705SXin Li 
1393*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_min_epu64(__mmask8 __M,__m512i __A,__m512i __B)1394*67e74705SXin Li _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1395*67e74705SXin Li {
1396*67e74705SXin Li   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1397*67e74705SXin Li                    (__v8di) __B,
1398*67e74705SXin Li                    (__v8di)
1399*67e74705SXin Li                    _mm512_setzero_si512 (),
1400*67e74705SXin Li                    __M);
1401*67e74705SXin Li }
1402*67e74705SXin Li 
1403*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mul_epi32(__m512i __X,__m512i __Y)1404*67e74705SXin Li _mm512_mul_epi32(__m512i __X, __m512i __Y)
1405*67e74705SXin Li {
1406*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1407*67e74705SXin Li               (__v16si) __Y,
1408*67e74705SXin Li               (__v8di)
1409*67e74705SXin Li               _mm512_setzero_si512 (),
1410*67e74705SXin Li               (__mmask8) -1);
1411*67e74705SXin Li }
1412*67e74705SXin Li 
1413*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mul_epi32(__m512i __W,__mmask8 __M,__m512i __X,__m512i __Y)1414*67e74705SXin Li _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1415*67e74705SXin Li {
1416*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1417*67e74705SXin Li               (__v16si) __Y,
1418*67e74705SXin Li               (__v8di) __W, __M);
1419*67e74705SXin Li }
1420*67e74705SXin Li 
1421*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mul_epi32(__mmask8 __M,__m512i __X,__m512i __Y)1422*67e74705SXin Li _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
1423*67e74705SXin Li {
1424*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1425*67e74705SXin Li               (__v16si) __Y,
1426*67e74705SXin Li               (__v8di)
1427*67e74705SXin Li               _mm512_setzero_si512 (),
1428*67e74705SXin Li               __M);
1429*67e74705SXin Li }
1430*67e74705SXin Li 
1431*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mul_epu32(__m512i __X,__m512i __Y)1432*67e74705SXin Li _mm512_mul_epu32(__m512i __X, __m512i __Y)
1433*67e74705SXin Li {
1434*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1435*67e74705SXin Li                (__v16si) __Y,
1436*67e74705SXin Li                (__v8di)
1437*67e74705SXin Li                _mm512_setzero_si512 (),
1438*67e74705SXin Li                (__mmask8) -1);
1439*67e74705SXin Li }
1440*67e74705SXin Li 
1441*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mul_epu32(__m512i __W,__mmask8 __M,__m512i __X,__m512i __Y)1442*67e74705SXin Li _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1443*67e74705SXin Li {
1444*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1445*67e74705SXin Li                (__v16si) __Y,
1446*67e74705SXin Li                (__v8di) __W, __M);
1447*67e74705SXin Li }
1448*67e74705SXin Li 
1449*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mul_epu32(__mmask8 __M,__m512i __X,__m512i __Y)1450*67e74705SXin Li _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
1451*67e74705SXin Li {
1452*67e74705SXin Li   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1453*67e74705SXin Li                (__v16si) __Y,
1454*67e74705SXin Li                (__v8di)
1455*67e74705SXin Li                _mm512_setzero_si512 (),
1456*67e74705SXin Li                __M);
1457*67e74705SXin Li }
1458*67e74705SXin Li 
1459*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi32(__m512i __A,__m512i __B)1460*67e74705SXin Li _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1461*67e74705SXin Li {
1462*67e74705SXin Li   return (__m512i) ((__v16su) __A * (__v16su) __B);
1463*67e74705SXin Li }
1464*67e74705SXin Li 
1465*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mullo_epi32(__mmask16 __M,__m512i __A,__m512i __B)1466*67e74705SXin Li _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1467*67e74705SXin Li {
1468*67e74705SXin Li   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1469*67e74705SXin Li               (__v16si) __B,
1470*67e74705SXin Li               (__v16si)
1471*67e74705SXin Li               _mm512_setzero_si512 (),
1472*67e74705SXin Li               __M);
1473*67e74705SXin Li }
1474*67e74705SXin Li 
1475*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mullo_epi32(__m512i __W,__mmask16 __M,__m512i __A,__m512i __B)1476*67e74705SXin Li _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1477*67e74705SXin Li {
1478*67e74705SXin Li   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
1479*67e74705SXin Li               (__v16si) __B,
1480*67e74705SXin Li               (__v16si) __W, __M);
1481*67e74705SXin Li }
1482*67e74705SXin Li 
1483*67e74705SXin Li #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1484*67e74705SXin Li   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1485*67e74705SXin Li                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
1486*67e74705SXin Li                                          (int)(R)); })
1487*67e74705SXin Li 
1488*67e74705SXin Li #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1489*67e74705SXin Li   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1490*67e74705SXin Li                                          (__v8df)_mm512_setzero_pd(), \
1491*67e74705SXin Li                                          (__mmask8)(U), (int)(R)); })
1492*67e74705SXin Li 
1493*67e74705SXin Li #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1494*67e74705SXin Li   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1495*67e74705SXin Li                                          (__v8df)_mm512_undefined_pd(), \
1496*67e74705SXin Li                                          (__mmask8)-1, (int)(R)); })
1497*67e74705SXin Li 
1498*67e74705SXin Li static  __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_sqrt_pd(__m512d __a)1499*67e74705SXin Li _mm512_sqrt_pd(__m512d __a)
1500*67e74705SXin Li {
1501*67e74705SXin Li   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1502*67e74705SXin Li                                                 (__v8df) _mm512_setzero_pd (),
1503*67e74705SXin Li                                                 (__mmask8) -1,
1504*67e74705SXin Li                                                 _MM_FROUND_CUR_DIRECTION);
1505*67e74705SXin Li }
1506*67e74705SXin Li 
1507*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_sqrt_pd(__m512d __W,__mmask8 __U,__m512d __A)1508*67e74705SXin Li _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509*67e74705SXin Li {
1510*67e74705SXin Li   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1511*67e74705SXin Li                    (__v8df) __W,
1512*67e74705SXin Li                    (__mmask8) __U,
1513*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
1514*67e74705SXin Li }
1515*67e74705SXin Li 
1516*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_sqrt_pd(__mmask8 __U,__m512d __A)1517*67e74705SXin Li _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1518*67e74705SXin Li {
1519*67e74705SXin Li   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1520*67e74705SXin Li                    (__v8df)
1521*67e74705SXin Li                    _mm512_setzero_pd (),
1522*67e74705SXin Li                    (__mmask8) __U,
1523*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
1524*67e74705SXin Li }
1525*67e74705SXin Li 
1526*67e74705SXin Li #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1527*67e74705SXin Li   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1528*67e74705SXin Li                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
1529*67e74705SXin Li                                         (int)(R)); })
1530*67e74705SXin Li 
1531*67e74705SXin Li #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1532*67e74705SXin Li   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1533*67e74705SXin Li                                         (__v16sf)_mm512_setzero_ps(), \
1534*67e74705SXin Li                                         (__mmask16)(U), (int)(R)); })
1535*67e74705SXin Li 
1536*67e74705SXin Li #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1537*67e74705SXin Li   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1538*67e74705SXin Li                                         (__v16sf)_mm512_undefined_ps(), \
1539*67e74705SXin Li                                         (__mmask16)-1, (int)(R)); })
1540*67e74705SXin Li 
1541*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_sqrt_ps(__m512 __a)1542*67e74705SXin Li _mm512_sqrt_ps(__m512 __a)
1543*67e74705SXin Li {
1544*67e74705SXin Li   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1545*67e74705SXin Li                                                (__v16sf) _mm512_setzero_ps (),
1546*67e74705SXin Li                                                (__mmask16) -1,
1547*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION);
1548*67e74705SXin Li }
1549*67e74705SXin Li 
1550*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_sqrt_ps(__m512 __W,__mmask16 __U,__m512 __A)1551*67e74705SXin Li _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1552*67e74705SXin Li {
1553*67e74705SXin Li   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1554*67e74705SXin Li                                                (__v16sf) __W,
1555*67e74705SXin Li                                                (__mmask16) __U,
1556*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION);
1557*67e74705SXin Li }
1558*67e74705SXin Li 
1559*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_sqrt_ps(__mmask16 __U,__m512 __A)1560*67e74705SXin Li _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1561*67e74705SXin Li {
1562*67e74705SXin Li   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1563*67e74705SXin Li                                                (__v16sf) _mm512_setzero_ps (),
1564*67e74705SXin Li                                                (__mmask16) __U,
1565*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION);
1566*67e74705SXin Li }
1567*67e74705SXin Li 
1568*67e74705SXin Li static  __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_rsqrt14_pd(__m512d __A)1569*67e74705SXin Li _mm512_rsqrt14_pd(__m512d __A)
1570*67e74705SXin Li {
1571*67e74705SXin Li   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1572*67e74705SXin Li                  (__v8df)
1573*67e74705SXin Li                  _mm512_setzero_pd (),
1574*67e74705SXin Li                  (__mmask8) -1);}
1575*67e74705SXin Li 
1576*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_rsqrt14_pd(__m512d __W,__mmask8 __U,__m512d __A)1577*67e74705SXin Li _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1578*67e74705SXin Li {
1579*67e74705SXin Li   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1580*67e74705SXin Li                   (__v8df) __W,
1581*67e74705SXin Li                   (__mmask8) __U);
1582*67e74705SXin Li }
1583*67e74705SXin Li 
1584*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_rsqrt14_pd(__mmask8 __U,__m512d __A)1585*67e74705SXin Li _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1586*67e74705SXin Li {
1587*67e74705SXin Li   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1588*67e74705SXin Li                   (__v8df)
1589*67e74705SXin Li                   _mm512_setzero_pd (),
1590*67e74705SXin Li                   (__mmask8) __U);
1591*67e74705SXin Li }
1592*67e74705SXin Li 
1593*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_rsqrt14_ps(__m512 __A)1594*67e74705SXin Li _mm512_rsqrt14_ps(__m512 __A)
1595*67e74705SXin Li {
1596*67e74705SXin Li   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597*67e74705SXin Li                 (__v16sf)
1598*67e74705SXin Li                 _mm512_setzero_ps (),
1599*67e74705SXin Li                 (__mmask16) -1);
1600*67e74705SXin Li }
1601*67e74705SXin Li 
1602*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_rsqrt14_ps(__m512 __W,__mmask16 __U,__m512 __A)1603*67e74705SXin Li _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1604*67e74705SXin Li {
1605*67e74705SXin Li   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1606*67e74705SXin Li                  (__v16sf) __W,
1607*67e74705SXin Li                  (__mmask16) __U);
1608*67e74705SXin Li }
1609*67e74705SXin Li 
1610*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_rsqrt14_ps(__mmask16 __U,__m512 __A)1611*67e74705SXin Li _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1612*67e74705SXin Li {
1613*67e74705SXin Li   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1614*67e74705SXin Li                  (__v16sf)
1615*67e74705SXin Li                  _mm512_setzero_ps (),
1616*67e74705SXin Li                  (__mmask16) __U);
1617*67e74705SXin Li }
1618*67e74705SXin Li 
1619*67e74705SXin Li static  __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rsqrt14_ss(__m128 __A,__m128 __B)1620*67e74705SXin Li _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1621*67e74705SXin Li {
1622*67e74705SXin Li   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1623*67e74705SXin Li              (__v4sf) __B,
1624*67e74705SXin Li              (__v4sf)
1625*67e74705SXin Li              _mm_setzero_ps (),
1626*67e74705SXin Li              (__mmask8) -1);
1627*67e74705SXin Li }
1628*67e74705SXin Li 
1629*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_rsqrt14_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)1630*67e74705SXin Li _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1631*67e74705SXin Li {
1632*67e74705SXin Li  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633*67e74705SXin Li           (__v4sf) __B,
1634*67e74705SXin Li           (__v4sf) __W,
1635*67e74705SXin Li           (__mmask8) __U);
1636*67e74705SXin Li }
1637*67e74705SXin Li 
1638*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_rsqrt14_ss(__mmask8 __U,__m128 __A,__m128 __B)1639*67e74705SXin Li _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1640*67e74705SXin Li {
1641*67e74705SXin Li  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1642*67e74705SXin Li           (__v4sf) __B,
1643*67e74705SXin Li           (__v4sf) _mm_setzero_ps (),
1644*67e74705SXin Li           (__mmask8) __U);
1645*67e74705SXin Li }
1646*67e74705SXin Li 
1647*67e74705SXin Li static  __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rsqrt14_sd(__m128d __A,__m128d __B)1648*67e74705SXin Li _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1649*67e74705SXin Li {
1650*67e74705SXin Li   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1651*67e74705SXin Li               (__v2df) __B,
1652*67e74705SXin Li               (__v2df)
1653*67e74705SXin Li               _mm_setzero_pd (),
1654*67e74705SXin Li               (__mmask8) -1);
1655*67e74705SXin Li }
1656*67e74705SXin Li 
1657*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_rsqrt14_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)1658*67e74705SXin Li _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1659*67e74705SXin Li {
1660*67e74705SXin Li  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661*67e74705SXin Li           (__v2df) __B,
1662*67e74705SXin Li           (__v2df) __W,
1663*67e74705SXin Li           (__mmask8) __U);
1664*67e74705SXin Li }
1665*67e74705SXin Li 
1666*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_rsqrt14_sd(__mmask8 __U,__m128d __A,__m128d __B)1667*67e74705SXin Li _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1668*67e74705SXin Li {
1669*67e74705SXin Li  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1670*67e74705SXin Li           (__v2df) __B,
1671*67e74705SXin Li           (__v2df) _mm_setzero_pd (),
1672*67e74705SXin Li           (__mmask8) __U);
1673*67e74705SXin Li }
1674*67e74705SXin Li 
1675*67e74705SXin Li static  __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_rcp14_pd(__m512d __A)1676*67e74705SXin Li _mm512_rcp14_pd(__m512d __A)
1677*67e74705SXin Li {
1678*67e74705SXin Li   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679*67e74705SXin Li                (__v8df)
1680*67e74705SXin Li                _mm512_setzero_pd (),
1681*67e74705SXin Li                (__mmask8) -1);
1682*67e74705SXin Li }
1683*67e74705SXin Li 
1684*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_rcp14_pd(__m512d __W,__mmask8 __U,__m512d __A)1685*67e74705SXin Li _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1686*67e74705SXin Li {
1687*67e74705SXin Li   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1688*67e74705SXin Li                 (__v8df) __W,
1689*67e74705SXin Li                 (__mmask8) __U);
1690*67e74705SXin Li }
1691*67e74705SXin Li 
1692*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_rcp14_pd(__mmask8 __U,__m512d __A)1693*67e74705SXin Li _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1694*67e74705SXin Li {
1695*67e74705SXin Li   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1696*67e74705SXin Li                 (__v8df)
1697*67e74705SXin Li                 _mm512_setzero_pd (),
1698*67e74705SXin Li                 (__mmask8) __U);
1699*67e74705SXin Li }
1700*67e74705SXin Li 
1701*67e74705SXin Li static  __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_rcp14_ps(__m512 __A)1702*67e74705SXin Li _mm512_rcp14_ps(__m512 __A)
1703*67e74705SXin Li {
1704*67e74705SXin Li   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705*67e74705SXin Li               (__v16sf)
1706*67e74705SXin Li               _mm512_setzero_ps (),
1707*67e74705SXin Li               (__mmask16) -1);
1708*67e74705SXin Li }
1709*67e74705SXin Li 
1710*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_rcp14_ps(__m512 __W,__mmask16 __U,__m512 __A)1711*67e74705SXin Li _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1712*67e74705SXin Li {
1713*67e74705SXin Li   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1714*67e74705SXin Li                    (__v16sf) __W,
1715*67e74705SXin Li                    (__mmask16) __U);
1716*67e74705SXin Li }
1717*67e74705SXin Li 
1718*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_rcp14_ps(__mmask16 __U,__m512 __A)1719*67e74705SXin Li _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1720*67e74705SXin Li {
1721*67e74705SXin Li   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1722*67e74705SXin Li                    (__v16sf)
1723*67e74705SXin Li                    _mm512_setzero_ps (),
1724*67e74705SXin Li                    (__mmask16) __U);
1725*67e74705SXin Li }
1726*67e74705SXin Li 
1727*67e74705SXin Li static  __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_rcp14_ss(__m128 __A,__m128 __B)1728*67e74705SXin Li _mm_rcp14_ss(__m128 __A, __m128 __B)
1729*67e74705SXin Li {
1730*67e74705SXin Li   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1731*67e74705SXin Li                  (__v4sf) __B,
1732*67e74705SXin Li                  (__v4sf)
1733*67e74705SXin Li                  _mm_setzero_ps (),
1734*67e74705SXin Li                  (__mmask8) -1);
1735*67e74705SXin Li }
1736*67e74705SXin Li 
1737*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_rcp14_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)1738*67e74705SXin Li _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1739*67e74705SXin Li {
1740*67e74705SXin Li  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741*67e74705SXin Li           (__v4sf) __B,
1742*67e74705SXin Li           (__v4sf) __W,
1743*67e74705SXin Li           (__mmask8) __U);
1744*67e74705SXin Li }
1745*67e74705SXin Li 
1746*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_rcp14_ss(__mmask8 __U,__m128 __A,__m128 __B)1747*67e74705SXin Li _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1748*67e74705SXin Li {
1749*67e74705SXin Li  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1750*67e74705SXin Li           (__v4sf) __B,
1751*67e74705SXin Li           (__v4sf) _mm_setzero_ps (),
1752*67e74705SXin Li           (__mmask8) __U);
1753*67e74705SXin Li }
1754*67e74705SXin Li 
1755*67e74705SXin Li static  __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_rcp14_sd(__m128d __A,__m128d __B)1756*67e74705SXin Li _mm_rcp14_sd(__m128d __A, __m128d __B)
1757*67e74705SXin Li {
1758*67e74705SXin Li   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1759*67e74705SXin Li             (__v2df) __B,
1760*67e74705SXin Li             (__v2df)
1761*67e74705SXin Li             _mm_setzero_pd (),
1762*67e74705SXin Li             (__mmask8) -1);
1763*67e74705SXin Li }
1764*67e74705SXin Li 
1765*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_rcp14_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)1766*67e74705SXin Li _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1767*67e74705SXin Li {
1768*67e74705SXin Li  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769*67e74705SXin Li           (__v2df) __B,
1770*67e74705SXin Li           (__v2df) __W,
1771*67e74705SXin Li           (__mmask8) __U);
1772*67e74705SXin Li }
1773*67e74705SXin Li 
1774*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_rcp14_sd(__mmask8 __U,__m128d __A,__m128d __B)1775*67e74705SXin Li _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1776*67e74705SXin Li {
1777*67e74705SXin Li  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1778*67e74705SXin Li           (__v2df) __B,
1779*67e74705SXin Li           (__v2df) _mm_setzero_pd (),
1780*67e74705SXin Li           (__mmask8) __U);
1781*67e74705SXin Li }
1782*67e74705SXin Li 
1783*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_floor_ps(__m512 __A)1784*67e74705SXin Li _mm512_floor_ps(__m512 __A)
1785*67e74705SXin Li {
1786*67e74705SXin Li   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787*67e74705SXin Li                                                   _MM_FROUND_FLOOR,
1788*67e74705SXin Li                                                   (__v16sf) __A, -1,
1789*67e74705SXin Li                                                   _MM_FROUND_CUR_DIRECTION);
1790*67e74705SXin Li }
1791*67e74705SXin Li 
1792*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_floor_ps(__m512 __W,__mmask16 __U,__m512 __A)1793*67e74705SXin Li _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1794*67e74705SXin Li {
1795*67e74705SXin Li   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1796*67e74705SXin Li                    _MM_FROUND_FLOOR,
1797*67e74705SXin Li                    (__v16sf) __W, __U,
1798*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
1799*67e74705SXin Li }
1800*67e74705SXin Li 
1801*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_floor_pd(__m512d __A)1802*67e74705SXin Li _mm512_floor_pd(__m512d __A)
1803*67e74705SXin Li {
1804*67e74705SXin Li   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805*67e74705SXin Li                                                    _MM_FROUND_FLOOR,
1806*67e74705SXin Li                                                    (__v8df) __A, -1,
1807*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
1808*67e74705SXin Li }
1809*67e74705SXin Li 
1810*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_floor_pd(__m512d __W,__mmask8 __U,__m512d __A)1811*67e74705SXin Li _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1812*67e74705SXin Li {
1813*67e74705SXin Li   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1814*67e74705SXin Li                 _MM_FROUND_FLOOR,
1815*67e74705SXin Li                 (__v8df) __W, __U,
1816*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1817*67e74705SXin Li }
1818*67e74705SXin Li 
1819*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_ceil_ps(__m512 __W,__mmask16 __U,__m512 __A)1820*67e74705SXin Li _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1821*67e74705SXin Li {
1822*67e74705SXin Li   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823*67e74705SXin Li                    _MM_FROUND_CEIL,
1824*67e74705SXin Li                    (__v16sf) __W, __U,
1825*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
1826*67e74705SXin Li }
1827*67e74705SXin Li 
1828*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_ceil_ps(__m512 __A)1829*67e74705SXin Li _mm512_ceil_ps(__m512 __A)
1830*67e74705SXin Li {
1831*67e74705SXin Li   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1832*67e74705SXin Li                                                   _MM_FROUND_CEIL,
1833*67e74705SXin Li                                                   (__v16sf) __A, -1,
1834*67e74705SXin Li                                                   _MM_FROUND_CUR_DIRECTION);
1835*67e74705SXin Li }
1836*67e74705SXin Li 
1837*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_ceil_pd(__m512d __A)1838*67e74705SXin Li _mm512_ceil_pd(__m512d __A)
1839*67e74705SXin Li {
1840*67e74705SXin Li   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841*67e74705SXin Li                                                    _MM_FROUND_CEIL,
1842*67e74705SXin Li                                                    (__v8df) __A, -1,
1843*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
1844*67e74705SXin Li }
1845*67e74705SXin Li 
1846*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_ceil_pd(__m512d __W,__mmask8 __U,__m512d __A)1847*67e74705SXin Li _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1848*67e74705SXin Li {
1849*67e74705SXin Li   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1850*67e74705SXin Li                 _MM_FROUND_CEIL,
1851*67e74705SXin Li                 (__v8df) __W, __U,
1852*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1853*67e74705SXin Li }
1854*67e74705SXin Li 
1855*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_abs_epi64(__m512i __A)1856*67e74705SXin Li _mm512_abs_epi64(__m512i __A)
1857*67e74705SXin Li {
1858*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1859*67e74705SXin Li              (__v8di)
1860*67e74705SXin Li              _mm512_setzero_si512 (),
1861*67e74705SXin Li              (__mmask8) -1);
1862*67e74705SXin Li }
1863*67e74705SXin Li 
1864*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_abs_epi64(__m512i __W,__mmask8 __U,__m512i __A)1865*67e74705SXin Li _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1866*67e74705SXin Li {
1867*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1868*67e74705SXin Li                   (__v8di) __W,
1869*67e74705SXin Li                   (__mmask8) __U);
1870*67e74705SXin Li }
1871*67e74705SXin Li 
1872*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_abs_epi64(__mmask8 __U,__m512i __A)1873*67e74705SXin Li _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1874*67e74705SXin Li {
1875*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1876*67e74705SXin Li                   (__v8di)
1877*67e74705SXin Li                   _mm512_setzero_si512 (),
1878*67e74705SXin Li                   (__mmask8) __U);
1879*67e74705SXin Li }
1880*67e74705SXin Li 
1881*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_abs_epi32(__m512i __A)1882*67e74705SXin Li _mm512_abs_epi32(__m512i __A)
1883*67e74705SXin Li {
1884*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1885*67e74705SXin Li              (__v16si)
1886*67e74705SXin Li              _mm512_setzero_si512 (),
1887*67e74705SXin Li              (__mmask16) -1);
1888*67e74705SXin Li }
1889*67e74705SXin Li 
1890*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_abs_epi32(__m512i __W,__mmask16 __U,__m512i __A)1891*67e74705SXin Li _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1892*67e74705SXin Li {
1893*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1894*67e74705SXin Li                   (__v16si) __W,
1895*67e74705SXin Li                   (__mmask16) __U);
1896*67e74705SXin Li }
1897*67e74705SXin Li 
1898*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_abs_epi32(__mmask16 __U,__m512i __A)1899*67e74705SXin Li _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1900*67e74705SXin Li {
1901*67e74705SXin Li   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1902*67e74705SXin Li                   (__v16si)
1903*67e74705SXin Li                   _mm512_setzero_si512 (),
1904*67e74705SXin Li                   (__mmask16) __U);
1905*67e74705SXin Li }
1906*67e74705SXin Li 
1907*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_add_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)1908*67e74705SXin Li _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1909*67e74705SXin Li   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1910*67e74705SXin Li                 (__v4sf) __B,
1911*67e74705SXin Li                 (__v4sf) __W,
1912*67e74705SXin Li                 (__mmask8) __U,
1913*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1914*67e74705SXin Li }
1915*67e74705SXin Li 
1916*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_add_ss(__mmask8 __U,__m128 __A,__m128 __B)1917*67e74705SXin Li _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1918*67e74705SXin Li   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1919*67e74705SXin Li                 (__v4sf) __B,
1920*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
1921*67e74705SXin Li                 (__mmask8) __U,
1922*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1923*67e74705SXin Li }
1924*67e74705SXin Li 
1925*67e74705SXin Li #define _mm_add_round_ss(A, B, R) __extension__ ({ \
1926*67e74705SXin Li   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1927*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1928*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
1929*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
1930*67e74705SXin Li 
1931*67e74705SXin Li #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
1932*67e74705SXin Li   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1933*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1934*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1935*67e74705SXin Li                                           (int)(R)); })
1936*67e74705SXin Li 
1937*67e74705SXin Li #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
1938*67e74705SXin Li   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1939*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
1940*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
1941*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
1942*67e74705SXin Li 
1943*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_add_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)1944*67e74705SXin Li _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1945*67e74705SXin Li   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1946*67e74705SXin Li                 (__v2df) __B,
1947*67e74705SXin Li                 (__v2df) __W,
1948*67e74705SXin Li                 (__mmask8) __U,
1949*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1950*67e74705SXin Li }
1951*67e74705SXin Li 
1952*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_add_sd(__mmask8 __U,__m128d __A,__m128d __B)1953*67e74705SXin Li _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1954*67e74705SXin Li   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1955*67e74705SXin Li                 (__v2df) __B,
1956*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
1957*67e74705SXin Li                 (__mmask8) __U,
1958*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
1959*67e74705SXin Li }
1960*67e74705SXin Li #define _mm_add_round_sd(A, B, R) __extension__ ({ \
1961*67e74705SXin Li   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1962*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1963*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
1964*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
1965*67e74705SXin Li 
1966*67e74705SXin Li #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
1967*67e74705SXin Li   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1968*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1969*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
1970*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
1971*67e74705SXin Li 
1972*67e74705SXin Li #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
1973*67e74705SXin Li   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1974*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
1975*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
1976*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
1977*67e74705SXin Li 
1978*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_add_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)1979*67e74705SXin Li _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1980*67e74705SXin Li   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1981*67e74705SXin Li              (__v8df) __B,
1982*67e74705SXin Li              (__v8df) __W,
1983*67e74705SXin Li              (__mmask8) __U,
1984*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
1985*67e74705SXin Li }
1986*67e74705SXin Li 
1987*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_add_pd(__mmask8 __U,__m512d __A,__m512d __B)1988*67e74705SXin Li _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1989*67e74705SXin Li   return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1990*67e74705SXin Li              (__v8df) __B,
1991*67e74705SXin Li              (__v8df) _mm512_setzero_pd (),
1992*67e74705SXin Li              (__mmask8) __U,
1993*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
1994*67e74705SXin Li }
1995*67e74705SXin Li 
1996*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_add_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)1997*67e74705SXin Li _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1998*67e74705SXin Li   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1999*67e74705SXin Li             (__v16sf) __B,
2000*67e74705SXin Li             (__v16sf) __W,
2001*67e74705SXin Li             (__mmask16) __U,
2002*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2003*67e74705SXin Li }
2004*67e74705SXin Li 
2005*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_add_ps(__mmask16 __U,__m512 __A,__m512 __B)2006*67e74705SXin Li _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2007*67e74705SXin Li   return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2008*67e74705SXin Li             (__v16sf) __B,
2009*67e74705SXin Li             (__v16sf) _mm512_setzero_ps (),
2010*67e74705SXin Li             (__mmask16) __U,
2011*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2012*67e74705SXin Li }
2013*67e74705SXin Li 
2014*67e74705SXin Li #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2015*67e74705SXin Li   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2016*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2017*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2018*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
2019*67e74705SXin Li 
2020*67e74705SXin Li #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2021*67e74705SXin Li   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2022*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2023*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2024*67e74705SXin Li                                         (int)(R)); })
2025*67e74705SXin Li 
2026*67e74705SXin Li #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2027*67e74705SXin Li   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2028*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2029*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2030*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
2031*67e74705SXin Li 
2032*67e74705SXin Li #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2033*67e74705SXin Li   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2034*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2035*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2036*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
2037*67e74705SXin Li 
2038*67e74705SXin Li #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2039*67e74705SXin Li   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2040*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2041*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2042*67e74705SXin Li                                        (int)(R)); })
2043*67e74705SXin Li 
2044*67e74705SXin Li #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2045*67e74705SXin Li   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2046*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2047*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2048*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); })
2049*67e74705SXin Li 
2050*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_sub_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)2051*67e74705SXin Li _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2052*67e74705SXin Li   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2053*67e74705SXin Li                 (__v4sf) __B,
2054*67e74705SXin Li                 (__v4sf) __W,
2055*67e74705SXin Li                 (__mmask8) __U,
2056*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2057*67e74705SXin Li }
2058*67e74705SXin Li 
2059*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A,__m128 __B)2060*67e74705SXin Li _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2061*67e74705SXin Li   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2062*67e74705SXin Li                 (__v4sf) __B,
2063*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
2064*67e74705SXin Li                 (__mmask8) __U,
2065*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2066*67e74705SXin Li }
2067*67e74705SXin Li #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2068*67e74705SXin Li   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2069*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2070*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2071*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
2072*67e74705SXin Li 
2073*67e74705SXin Li #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2074*67e74705SXin Li   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2075*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2076*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2077*67e74705SXin Li                                           (int)(R)); })
2078*67e74705SXin Li 
2079*67e74705SXin Li #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2080*67e74705SXin Li   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2081*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2082*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2083*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
2084*67e74705SXin Li 
2085*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_sub_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)2086*67e74705SXin Li _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2087*67e74705SXin Li   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2088*67e74705SXin Li                 (__v2df) __B,
2089*67e74705SXin Li                 (__v2df) __W,
2090*67e74705SXin Li                 (__mmask8) __U,
2091*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2092*67e74705SXin Li }
2093*67e74705SXin Li 
2094*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A,__m128d __B)2095*67e74705SXin Li _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2096*67e74705SXin Li   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2097*67e74705SXin Li                 (__v2df) __B,
2098*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
2099*67e74705SXin Li                 (__mmask8) __U,
2100*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2101*67e74705SXin Li }
2102*67e74705SXin Li 
2103*67e74705SXin Li #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2104*67e74705SXin Li   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2105*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2106*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2107*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
2108*67e74705SXin Li 
2109*67e74705SXin Li #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2110*67e74705SXin Li   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2111*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2112*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
2113*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2114*67e74705SXin Li 
2115*67e74705SXin Li #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2116*67e74705SXin Li   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2117*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2118*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2119*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2120*67e74705SXin Li 
2121*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_sub_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)2122*67e74705SXin Li _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2123*67e74705SXin Li   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2124*67e74705SXin Li              (__v8df) __B,
2125*67e74705SXin Li              (__v8df) __W,
2126*67e74705SXin Li              (__mmask8) __U,
2127*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2128*67e74705SXin Li }
2129*67e74705SXin Li 
2130*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_sub_pd(__mmask8 __U,__m512d __A,__m512d __B)2131*67e74705SXin Li _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2132*67e74705SXin Li   return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2133*67e74705SXin Li              (__v8df) __B,
2134*67e74705SXin Li              (__v8df)
2135*67e74705SXin Li              _mm512_setzero_pd (),
2136*67e74705SXin Li              (__mmask8) __U,
2137*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2138*67e74705SXin Li }
2139*67e74705SXin Li 
2140*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_sub_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)2141*67e74705SXin Li _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2142*67e74705SXin Li   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2143*67e74705SXin Li             (__v16sf) __B,
2144*67e74705SXin Li             (__v16sf) __W,
2145*67e74705SXin Li             (__mmask16) __U,
2146*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2147*67e74705SXin Li }
2148*67e74705SXin Li 
2149*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_sub_ps(__mmask16 __U,__m512 __A,__m512 __B)2150*67e74705SXin Li _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2151*67e74705SXin Li   return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2152*67e74705SXin Li             (__v16sf) __B,
2153*67e74705SXin Li             (__v16sf)
2154*67e74705SXin Li             _mm512_setzero_ps (),
2155*67e74705SXin Li             (__mmask16) __U,
2156*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2157*67e74705SXin Li }
2158*67e74705SXin Li 
2159*67e74705SXin Li #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2160*67e74705SXin Li   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2161*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2162*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2163*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
2164*67e74705SXin Li 
2165*67e74705SXin Li #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2166*67e74705SXin Li   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2167*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2168*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2169*67e74705SXin Li                                         (int)(R)); })
2170*67e74705SXin Li 
2171*67e74705SXin Li #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2172*67e74705SXin Li   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2173*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2174*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2175*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
2176*67e74705SXin Li 
2177*67e74705SXin Li #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2178*67e74705SXin Li   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2179*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2180*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2181*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
2182*67e74705SXin Li 
2183*67e74705SXin Li #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2184*67e74705SXin Li   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2185*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2186*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2187*67e74705SXin Li                                        (int)(R)); });
2188*67e74705SXin Li 
2189*67e74705SXin Li #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2190*67e74705SXin Li   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2191*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2192*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2193*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); });
2194*67e74705SXin Li 
2195*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_mul_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)2196*67e74705SXin Li _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2197*67e74705SXin Li   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2198*67e74705SXin Li                 (__v4sf) __B,
2199*67e74705SXin Li                 (__v4sf) __W,
2200*67e74705SXin Li                 (__mmask8) __U,
2201*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2202*67e74705SXin Li }
2203*67e74705SXin Li 
2204*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A,__m128 __B)2205*67e74705SXin Li _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2206*67e74705SXin Li   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2207*67e74705SXin Li                 (__v4sf) __B,
2208*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
2209*67e74705SXin Li                 (__mmask8) __U,
2210*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2211*67e74705SXin Li }
2212*67e74705SXin Li #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2213*67e74705SXin Li   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2214*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2215*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2216*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
2217*67e74705SXin Li 
2218*67e74705SXin Li #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2219*67e74705SXin Li   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2220*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2221*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2222*67e74705SXin Li                                           (int)(R)); })
2223*67e74705SXin Li 
2224*67e74705SXin Li #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2225*67e74705SXin Li   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2226*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2227*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2228*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
2229*67e74705SXin Li 
2230*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_mul_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)2231*67e74705SXin Li _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2232*67e74705SXin Li   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2233*67e74705SXin Li                 (__v2df) __B,
2234*67e74705SXin Li                 (__v2df) __W,
2235*67e74705SXin Li                 (__mmask8) __U,
2236*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2237*67e74705SXin Li }
2238*67e74705SXin Li 
2239*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A,__m128d __B)2240*67e74705SXin Li _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2241*67e74705SXin Li   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2242*67e74705SXin Li                 (__v2df) __B,
2243*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
2244*67e74705SXin Li                 (__mmask8) __U,
2245*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2246*67e74705SXin Li }
2247*67e74705SXin Li 
2248*67e74705SXin Li #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2249*67e74705SXin Li   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2250*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2251*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2252*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
2253*67e74705SXin Li 
2254*67e74705SXin Li #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2255*67e74705SXin Li   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2256*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2257*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
2258*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2259*67e74705SXin Li 
2260*67e74705SXin Li #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2261*67e74705SXin Li   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2262*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2263*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2264*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2265*67e74705SXin Li 
2266*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_mul_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)2267*67e74705SXin Li _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2268*67e74705SXin Li   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2269*67e74705SXin Li              (__v8df) __B,
2270*67e74705SXin Li              (__v8df) __W,
2271*67e74705SXin Li              (__mmask8) __U,
2272*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2273*67e74705SXin Li }
2274*67e74705SXin Li 
2275*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_mul_pd(__mmask8 __U,__m512d __A,__m512d __B)2276*67e74705SXin Li _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2277*67e74705SXin Li   return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2278*67e74705SXin Li              (__v8df) __B,
2279*67e74705SXin Li              (__v8df)
2280*67e74705SXin Li              _mm512_setzero_pd (),
2281*67e74705SXin Li              (__mmask8) __U,
2282*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2283*67e74705SXin Li }
2284*67e74705SXin Li 
2285*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_mul_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)2286*67e74705SXin Li _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2287*67e74705SXin Li   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2288*67e74705SXin Li             (__v16sf) __B,
2289*67e74705SXin Li             (__v16sf) __W,
2290*67e74705SXin Li             (__mmask16) __U,
2291*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2292*67e74705SXin Li }
2293*67e74705SXin Li 
2294*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_mul_ps(__mmask16 __U,__m512 __A,__m512 __B)2295*67e74705SXin Li _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2296*67e74705SXin Li   return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2297*67e74705SXin Li             (__v16sf) __B,
2298*67e74705SXin Li             (__v16sf)
2299*67e74705SXin Li             _mm512_setzero_ps (),
2300*67e74705SXin Li             (__mmask16) __U,
2301*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2302*67e74705SXin Li }
2303*67e74705SXin Li 
2304*67e74705SXin Li #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2305*67e74705SXin Li   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2306*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2307*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2308*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
2309*67e74705SXin Li 
2310*67e74705SXin Li #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2311*67e74705SXin Li   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2312*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2313*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2314*67e74705SXin Li                                         (int)(R)); })
2315*67e74705SXin Li 
2316*67e74705SXin Li #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2317*67e74705SXin Li   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2318*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2319*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2320*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
2321*67e74705SXin Li 
2322*67e74705SXin Li #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2323*67e74705SXin Li   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2324*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2325*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2326*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
2327*67e74705SXin Li 
2328*67e74705SXin Li #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2329*67e74705SXin Li   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2330*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2331*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2332*67e74705SXin Li                                        (int)(R)); });
2333*67e74705SXin Li 
2334*67e74705SXin Li #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2335*67e74705SXin Li   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2336*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2337*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2338*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); });
2339*67e74705SXin Li 
2340*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_div_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)2341*67e74705SXin Li _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2342*67e74705SXin Li   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2343*67e74705SXin Li                 (__v4sf) __B,
2344*67e74705SXin Li                 (__v4sf) __W,
2345*67e74705SXin Li                 (__mmask8) __U,
2346*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2347*67e74705SXin Li }
2348*67e74705SXin Li 
2349*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_div_ss(__mmask8 __U,__m128 __A,__m128 __B)2350*67e74705SXin Li _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2351*67e74705SXin Li   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2352*67e74705SXin Li                 (__v4sf) __B,
2353*67e74705SXin Li                 (__v4sf)  _mm_setzero_ps (),
2354*67e74705SXin Li                 (__mmask8) __U,
2355*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2356*67e74705SXin Li }
2357*67e74705SXin Li 
2358*67e74705SXin Li #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2359*67e74705SXin Li   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2360*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2361*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2362*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
2363*67e74705SXin Li 
2364*67e74705SXin Li #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2365*67e74705SXin Li   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2366*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2367*67e74705SXin Li                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2368*67e74705SXin Li                                           (int)(R)); })
2369*67e74705SXin Li 
2370*67e74705SXin Li #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2371*67e74705SXin Li   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2372*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
2373*67e74705SXin Li                                           (__v4sf)_mm_setzero_ps(), \
2374*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
2375*67e74705SXin Li 
2376*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_div_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)2377*67e74705SXin Li _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2378*67e74705SXin Li   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2379*67e74705SXin Li                 (__v2df) __B,
2380*67e74705SXin Li                 (__v2df) __W,
2381*67e74705SXin Li                 (__mmask8) __U,
2382*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2383*67e74705SXin Li }
2384*67e74705SXin Li 
2385*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_div_sd(__mmask8 __U,__m128d __A,__m128d __B)2386*67e74705SXin Li _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2387*67e74705SXin Li   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2388*67e74705SXin Li                 (__v2df) __B,
2389*67e74705SXin Li                 (__v2df)  _mm_setzero_pd (),
2390*67e74705SXin Li                 (__mmask8) __U,
2391*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
2392*67e74705SXin Li }
2393*67e74705SXin Li 
2394*67e74705SXin Li #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2395*67e74705SXin Li   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2396*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2397*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2398*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
2399*67e74705SXin Li 
2400*67e74705SXin Li #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2401*67e74705SXin Li   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2402*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2403*67e74705SXin Li                                            (__v2df)(__m128d)(W), \
2404*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2405*67e74705SXin Li 
2406*67e74705SXin Li #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2407*67e74705SXin Li   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2408*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
2409*67e74705SXin Li                                            (__v2df)_mm_setzero_pd(), \
2410*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2411*67e74705SXin Li 
2412*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_div_pd(__m512d __a,__m512d __b)2413*67e74705SXin Li _mm512_div_pd(__m512d __a, __m512d __b)
2414*67e74705SXin Li {
2415*67e74705SXin Li   return (__m512d)((__v8df)__a/(__v8df)__b);
2416*67e74705SXin Li }
2417*67e74705SXin Li 
2418*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_div_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)2419*67e74705SXin Li _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2420*67e74705SXin Li   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
2421*67e74705SXin Li              (__v8df) __B,
2422*67e74705SXin Li              (__v8df) __W,
2423*67e74705SXin Li              (__mmask8) __U,
2424*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2425*67e74705SXin Li }
2426*67e74705SXin Li 
2427*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_div_pd(__mmask8 __U,__m512d __A,__m512d __B)2428*67e74705SXin Li _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2429*67e74705SXin Li   return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
2430*67e74705SXin Li              (__v8df) __B,
2431*67e74705SXin Li              (__v8df)
2432*67e74705SXin Li              _mm512_setzero_pd (),
2433*67e74705SXin Li              (__mmask8) __U,
2434*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
2435*67e74705SXin Li }
2436*67e74705SXin Li 
2437*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_div_ps(__m512 __a,__m512 __b)2438*67e74705SXin Li _mm512_div_ps(__m512 __a, __m512 __b)
2439*67e74705SXin Li {
2440*67e74705SXin Li   return (__m512)((__v16sf)__a/(__v16sf)__b);
2441*67e74705SXin Li }
2442*67e74705SXin Li 
2443*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_div_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)2444*67e74705SXin Li _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2445*67e74705SXin Li   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2446*67e74705SXin Li             (__v16sf) __B,
2447*67e74705SXin Li             (__v16sf) __W,
2448*67e74705SXin Li             (__mmask16) __U,
2449*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2450*67e74705SXin Li }
2451*67e74705SXin Li 
2452*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_div_ps(__mmask16 __U,__m512 __A,__m512 __B)2453*67e74705SXin Li _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2454*67e74705SXin Li   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2455*67e74705SXin Li             (__v16sf) __B,
2456*67e74705SXin Li             (__v16sf)
2457*67e74705SXin Li             _mm512_setzero_ps (),
2458*67e74705SXin Li             (__mmask16) __U,
2459*67e74705SXin Li             _MM_FROUND_CUR_DIRECTION);
2460*67e74705SXin Li }
2461*67e74705SXin Li 
2462*67e74705SXin Li #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2463*67e74705SXin Li   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2464*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2465*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2466*67e74705SXin Li                                         (__mmask8)-1, (int)(R)); })
2467*67e74705SXin Li 
2468*67e74705SXin Li #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2469*67e74705SXin Li   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2470*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2471*67e74705SXin Li                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2472*67e74705SXin Li                                         (int)(R)); })
2473*67e74705SXin Li 
2474*67e74705SXin Li #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2475*67e74705SXin Li   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2476*67e74705SXin Li                                         (__v8df)(__m512d)(B), \
2477*67e74705SXin Li                                         (__v8df)_mm512_setzero_pd(), \
2478*67e74705SXin Li                                         (__mmask8)(U), (int)(R)); })
2479*67e74705SXin Li 
2480*67e74705SXin Li #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2481*67e74705SXin Li   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2482*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2483*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2484*67e74705SXin Li                                        (__mmask16)-1, (int)(R)); })
2485*67e74705SXin Li 
2486*67e74705SXin Li #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2487*67e74705SXin Li   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2488*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2489*67e74705SXin Li                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2490*67e74705SXin Li                                        (int)(R)); });
2491*67e74705SXin Li 
2492*67e74705SXin Li #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2493*67e74705SXin Li   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2494*67e74705SXin Li                                        (__v16sf)(__m512)(B), \
2495*67e74705SXin Li                                        (__v16sf)_mm512_setzero_ps(), \
2496*67e74705SXin Li                                        (__mmask16)(U), (int)(R)); });
2497*67e74705SXin Li 
2498*67e74705SXin Li #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2499*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2500*67e74705SXin Li                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
2501*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
2502*67e74705SXin Li 
2503*67e74705SXin Li #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2504*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2505*67e74705SXin Li                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2506*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
2507*67e74705SXin Li 
2508*67e74705SXin Li #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2509*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2510*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
2511*67e74705SXin Li                                          (__mmask16)(A), \
2512*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
2513*67e74705SXin Li 
2514*67e74705SXin Li #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2515*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2516*67e74705SXin Li                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2517*67e74705SXin Li                                          (int)(R)); })
2518*67e74705SXin Li 
2519*67e74705SXin Li #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2520*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2521*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
2522*67e74705SXin Li                                          (__mmask16)(A), (int)(R)); })
2523*67e74705SXin Li 
2524*67e74705SXin Li #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2525*67e74705SXin Li   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2526*67e74705SXin Li                                          (__v16sf)_mm512_undefined_ps(), \
2527*67e74705SXin Li                                          (__mmask16)-1, (int)(R)); })
2528*67e74705SXin Li 
2529*67e74705SXin Li #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2530*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2531*67e74705SXin Li                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
2532*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
2533*67e74705SXin Li 
2534*67e74705SXin Li #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2535*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2536*67e74705SXin Li                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2537*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
2538*67e74705SXin Li 
2539*67e74705SXin Li #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2540*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2541*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
2542*67e74705SXin Li                                           (__mmask8)(A), \
2543*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
2544*67e74705SXin Li 
2545*67e74705SXin Li #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2546*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2547*67e74705SXin Li                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2548*67e74705SXin Li                                           (int)(R)); })
2549*67e74705SXin Li 
2550*67e74705SXin Li #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2551*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2552*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
2553*67e74705SXin Li                                           (__mmask8)(A), (int)(R)); })
2554*67e74705SXin Li 
2555*67e74705SXin Li #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2556*67e74705SXin Li   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2557*67e74705SXin Li                                           (__v8df)_mm512_undefined_pd(), \
2558*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
2559*67e74705SXin Li 
2560*67e74705SXin Li #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2561*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2562*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2563*67e74705SXin Li                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2564*67e74705SXin Li                                            (int)(R)); })
2565*67e74705SXin Li 
2566*67e74705SXin Li 
2567*67e74705SXin Li #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2568*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2569*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2570*67e74705SXin Li                                            (__v8df)(__m512d)(C), \
2571*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2572*67e74705SXin Li 
2573*67e74705SXin Li 
2574*67e74705SXin Li #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2575*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2576*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2577*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
2578*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2579*67e74705SXin Li 
2580*67e74705SXin Li 
2581*67e74705SXin Li #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2582*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2583*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2584*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
2585*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2586*67e74705SXin Li 
2587*67e74705SXin Li 
2588*67e74705SXin Li #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2589*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2590*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2591*67e74705SXin Li                                            -(__v8df)(__m512d)(C), \
2592*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
2593*67e74705SXin Li 
2594*67e74705SXin Li 
2595*67e74705SXin Li #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2596*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2597*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2598*67e74705SXin Li                                            -(__v8df)(__m512d)(C), \
2599*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
2600*67e74705SXin Li 
2601*67e74705SXin Li 
2602*67e74705SXin Li #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2603*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2604*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2605*67e74705SXin Li                                             -(__v8df)(__m512d)(C), \
2606*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2607*67e74705SXin Li 
2608*67e74705SXin Li 
2609*67e74705SXin Li #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2610*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2611*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2612*67e74705SXin Li                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2613*67e74705SXin Li                                            (int)(R)); })
2614*67e74705SXin Li 
2615*67e74705SXin Li 
2616*67e74705SXin Li #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2617*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2618*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2619*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
2620*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2621*67e74705SXin Li 
2622*67e74705SXin Li 
2623*67e74705SXin Li #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2624*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2625*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2626*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
2627*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2628*67e74705SXin Li 
2629*67e74705SXin Li 
2630*67e74705SXin Li #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2631*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2632*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
2633*67e74705SXin Li                                            -(__v8df)(__m512d)(C), \
2634*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
2635*67e74705SXin Li 
2636*67e74705SXin Li 
2637*67e74705SXin Li #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2638*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2639*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
2640*67e74705SXin Li                                             -(__v8df)(__m512d)(C), \
2641*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
2642*67e74705SXin Li 
2643*67e74705SXin Li 
2644*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmadd_pd(__m512d __A,__m512d __B,__m512d __C)2645*67e74705SXin Li _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2646*67e74705SXin Li {
2647*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2648*67e74705SXin Li                                                     (__v8df) __B,
2649*67e74705SXin Li                                                     (__v8df) __C,
2650*67e74705SXin Li                                                     (__mmask8) -1,
2651*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2652*67e74705SXin Li }
2653*67e74705SXin Li 
2654*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)2655*67e74705SXin Li _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2656*67e74705SXin Li {
2657*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2658*67e74705SXin Li                                                     (__v8df) __B,
2659*67e74705SXin Li                                                     (__v8df) __C,
2660*67e74705SXin Li                                                     (__mmask8) __U,
2661*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2662*67e74705SXin Li }
2663*67e74705SXin Li 
2664*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)2665*67e74705SXin Li _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2666*67e74705SXin Li {
2667*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2668*67e74705SXin Li                                                      (__v8df) __B,
2669*67e74705SXin Li                                                      (__v8df) __C,
2670*67e74705SXin Li                                                      (__mmask8) __U,
2671*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2672*67e74705SXin Li }
2673*67e74705SXin Li 
2674*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)2675*67e74705SXin Li _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2676*67e74705SXin Li {
2677*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2678*67e74705SXin Li                                                      (__v8df) __B,
2679*67e74705SXin Li                                                      (__v8df) __C,
2680*67e74705SXin Li                                                      (__mmask8) __U,
2681*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2682*67e74705SXin Li }
2683*67e74705SXin Li 
2684*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsub_pd(__m512d __A,__m512d __B,__m512d __C)2685*67e74705SXin Li _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2686*67e74705SXin Li {
2687*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2688*67e74705SXin Li                                                     (__v8df) __B,
2689*67e74705SXin Li                                                     -(__v8df) __C,
2690*67e74705SXin Li                                                     (__mmask8) -1,
2691*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2692*67e74705SXin Li }
2693*67e74705SXin Li 
2694*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)2695*67e74705SXin Li _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2696*67e74705SXin Li {
2697*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2698*67e74705SXin Li                                                     (__v8df) __B,
2699*67e74705SXin Li                                                     -(__v8df) __C,
2700*67e74705SXin Li                                                     (__mmask8) __U,
2701*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2702*67e74705SXin Li }
2703*67e74705SXin Li 
2704*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)2705*67e74705SXin Li _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2706*67e74705SXin Li {
2707*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2708*67e74705SXin Li                                                      (__v8df) __B,
2709*67e74705SXin Li                                                      -(__v8df) __C,
2710*67e74705SXin Li                                                      (__mmask8) __U,
2711*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2712*67e74705SXin Li }
2713*67e74705SXin Li 
2714*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmadd_pd(__m512d __A,__m512d __B,__m512d __C)2715*67e74705SXin Li _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2716*67e74705SXin Li {
2717*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2718*67e74705SXin Li                                                     (__v8df) __B,
2719*67e74705SXin Li                                                     (__v8df) __C,
2720*67e74705SXin Li                                                     (__mmask8) -1,
2721*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2722*67e74705SXin Li }
2723*67e74705SXin Li 
2724*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)2725*67e74705SXin Li _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2726*67e74705SXin Li {
2727*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2728*67e74705SXin Li                                                      (__v8df) __B,
2729*67e74705SXin Li                                                      (__v8df) __C,
2730*67e74705SXin Li                                                      (__mmask8) __U,
2731*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2732*67e74705SXin Li }
2733*67e74705SXin Li 
2734*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)2735*67e74705SXin Li _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2736*67e74705SXin Li {
2737*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2738*67e74705SXin Li                                                      (__v8df) __B,
2739*67e74705SXin Li                                                      (__v8df) __C,
2740*67e74705SXin Li                                                      (__mmask8) __U,
2741*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2742*67e74705SXin Li }
2743*67e74705SXin Li 
2744*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fnmsub_pd(__m512d __A,__m512d __B,__m512d __C)2745*67e74705SXin Li _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2746*67e74705SXin Li {
2747*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2748*67e74705SXin Li                                                     (__v8df) __B,
2749*67e74705SXin Li                                                     -(__v8df) __C,
2750*67e74705SXin Li                                                     (__mmask8) -1,
2751*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2752*67e74705SXin Li }
2753*67e74705SXin Li 
2754*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)2755*67e74705SXin Li _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2756*67e74705SXin Li {
2757*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2758*67e74705SXin Li                                                      (__v8df) __B,
2759*67e74705SXin Li                                                      -(__v8df) __C,
2760*67e74705SXin Li                                                      (__mmask8) __U,
2761*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
2762*67e74705SXin Li }
2763*67e74705SXin Li 
2764*67e74705SXin Li #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2765*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2766*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2767*67e74705SXin Li                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2768*67e74705SXin Li                                           (int)(R)); })
2769*67e74705SXin Li 
2770*67e74705SXin Li 
2771*67e74705SXin Li #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2772*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2773*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2774*67e74705SXin Li                                           (__v16sf)(__m512)(C), \
2775*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
2776*67e74705SXin Li 
2777*67e74705SXin Li 
2778*67e74705SXin Li #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2779*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2780*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2781*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
2782*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2783*67e74705SXin Li 
2784*67e74705SXin Li 
2785*67e74705SXin Li #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2786*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2787*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2788*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
2789*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2790*67e74705SXin Li 
2791*67e74705SXin Li 
2792*67e74705SXin Li #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2793*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2794*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2795*67e74705SXin Li                                           -(__v16sf)(__m512)(C), \
2796*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
2797*67e74705SXin Li 
2798*67e74705SXin Li 
2799*67e74705SXin Li #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2800*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2801*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2802*67e74705SXin Li                                           -(__v16sf)(__m512)(C), \
2803*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
2804*67e74705SXin Li 
2805*67e74705SXin Li 
2806*67e74705SXin Li #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2807*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2808*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2809*67e74705SXin Li                                            -(__v16sf)(__m512)(C), \
2810*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2811*67e74705SXin Li 
2812*67e74705SXin Li 
2813*67e74705SXin Li #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2814*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2815*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2816*67e74705SXin Li                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2817*67e74705SXin Li                                           (int)(R)); })
2818*67e74705SXin Li 
2819*67e74705SXin Li 
2820*67e74705SXin Li #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2821*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2822*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2823*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
2824*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2825*67e74705SXin Li 
2826*67e74705SXin Li 
2827*67e74705SXin Li #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2828*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2829*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2830*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
2831*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2832*67e74705SXin Li 
2833*67e74705SXin Li 
2834*67e74705SXin Li #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2835*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2836*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
2837*67e74705SXin Li                                           -(__v16sf)(__m512)(C), \
2838*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
2839*67e74705SXin Li 
2840*67e74705SXin Li 
2841*67e74705SXin Li #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2842*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2843*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
2844*67e74705SXin Li                                            -(__v16sf)(__m512)(C), \
2845*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
2846*67e74705SXin Li 
2847*67e74705SXin Li 
2848*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmadd_ps(__m512 __A,__m512 __B,__m512 __C)2849*67e74705SXin Li _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2850*67e74705SXin Li {
2851*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2852*67e74705SXin Li                                                    (__v16sf) __B,
2853*67e74705SXin Li                                                    (__v16sf) __C,
2854*67e74705SXin Li                                                    (__mmask16) -1,
2855*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2856*67e74705SXin Li }
2857*67e74705SXin Li 
2858*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmadd_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)2859*67e74705SXin Li _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2860*67e74705SXin Li {
2861*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2862*67e74705SXin Li                                                    (__v16sf) __B,
2863*67e74705SXin Li                                                    (__v16sf) __C,
2864*67e74705SXin Li                                                    (__mmask16) __U,
2865*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2866*67e74705SXin Li }
2867*67e74705SXin Li 
2868*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmadd_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)2869*67e74705SXin Li _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2870*67e74705SXin Li {
2871*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2872*67e74705SXin Li                                                     (__v16sf) __B,
2873*67e74705SXin Li                                                     (__v16sf) __C,
2874*67e74705SXin Li                                                     (__mmask16) __U,
2875*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2876*67e74705SXin Li }
2877*67e74705SXin Li 
2878*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmadd_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)2879*67e74705SXin Li _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2880*67e74705SXin Li {
2881*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2882*67e74705SXin Li                                                     (__v16sf) __B,
2883*67e74705SXin Li                                                     (__v16sf) __C,
2884*67e74705SXin Li                                                     (__mmask16) __U,
2885*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2886*67e74705SXin Li }
2887*67e74705SXin Li 
2888*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsub_ps(__m512 __A,__m512 __B,__m512 __C)2889*67e74705SXin Li _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2890*67e74705SXin Li {
2891*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2892*67e74705SXin Li                                                    (__v16sf) __B,
2893*67e74705SXin Li                                                    -(__v16sf) __C,
2894*67e74705SXin Li                                                    (__mmask16) -1,
2895*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2896*67e74705SXin Li }
2897*67e74705SXin Li 
2898*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsub_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)2899*67e74705SXin Li _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2900*67e74705SXin Li {
2901*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2902*67e74705SXin Li                                                    (__v16sf) __B,
2903*67e74705SXin Li                                                    -(__v16sf) __C,
2904*67e74705SXin Li                                                    (__mmask16) __U,
2905*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2906*67e74705SXin Li }
2907*67e74705SXin Li 
2908*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsub_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)2909*67e74705SXin Li _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2910*67e74705SXin Li {
2911*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2912*67e74705SXin Li                                                     (__v16sf) __B,
2913*67e74705SXin Li                                                     -(__v16sf) __C,
2914*67e74705SXin Li                                                     (__mmask16) __U,
2915*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2916*67e74705SXin Li }
2917*67e74705SXin Li 
2918*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmadd_ps(__m512 __A,__m512 __B,__m512 __C)2919*67e74705SXin Li _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2920*67e74705SXin Li {
2921*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2922*67e74705SXin Li                                                    (__v16sf) __B,
2923*67e74705SXin Li                                                    (__v16sf) __C,
2924*67e74705SXin Li                                                    (__mmask16) -1,
2925*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2926*67e74705SXin Li }
2927*67e74705SXin Li 
2928*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmadd_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)2929*67e74705SXin Li _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2930*67e74705SXin Li {
2931*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2932*67e74705SXin Li                                                     (__v16sf) __B,
2933*67e74705SXin Li                                                     (__v16sf) __C,
2934*67e74705SXin Li                                                     (__mmask16) __U,
2935*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2936*67e74705SXin Li }
2937*67e74705SXin Li 
2938*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmadd_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)2939*67e74705SXin Li _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2940*67e74705SXin Li {
2941*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2942*67e74705SXin Li                                                     (__v16sf) __B,
2943*67e74705SXin Li                                                     (__v16sf) __C,
2944*67e74705SXin Li                                                     (__mmask16) __U,
2945*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2946*67e74705SXin Li }
2947*67e74705SXin Li 
2948*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fnmsub_ps(__m512 __A,__m512 __B,__m512 __C)2949*67e74705SXin Li _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2950*67e74705SXin Li {
2951*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2952*67e74705SXin Li                                                    (__v16sf) __B,
2953*67e74705SXin Li                                                    -(__v16sf) __C,
2954*67e74705SXin Li                                                    (__mmask16) -1,
2955*67e74705SXin Li                                                    _MM_FROUND_CUR_DIRECTION);
2956*67e74705SXin Li }
2957*67e74705SXin Li 
2958*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fnmsub_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)2959*67e74705SXin Li _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2960*67e74705SXin Li {
2961*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2962*67e74705SXin Li                                                     (__v16sf) __B,
2963*67e74705SXin Li                                                     -(__v16sf) __C,
2964*67e74705SXin Li                                                     (__mmask16) __U,
2965*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
2966*67e74705SXin Li }
2967*67e74705SXin Li 
2968*67e74705SXin Li #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
2969*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2970*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
2971*67e74705SXin Li                                               (__v8df)(__m512d)(C), \
2972*67e74705SXin Li                                               (__mmask8)-1, (int)(R)); })
2973*67e74705SXin Li 
2974*67e74705SXin Li 
2975*67e74705SXin Li #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
2976*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2977*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
2978*67e74705SXin Li                                               (__v8df)(__m512d)(C), \
2979*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
2980*67e74705SXin Li 
2981*67e74705SXin Li 
2982*67e74705SXin Li #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
2983*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2984*67e74705SXin Li                                                (__v8df)(__m512d)(B), \
2985*67e74705SXin Li                                                (__v8df)(__m512d)(C), \
2986*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
2987*67e74705SXin Li 
2988*67e74705SXin Li 
2989*67e74705SXin Li #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
2990*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2991*67e74705SXin Li                                                (__v8df)(__m512d)(B), \
2992*67e74705SXin Li                                                (__v8df)(__m512d)(C), \
2993*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
2994*67e74705SXin Li 
2995*67e74705SXin Li 
2996*67e74705SXin Li #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
2997*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2998*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
2999*67e74705SXin Li                                               -(__v8df)(__m512d)(C), \
3000*67e74705SXin Li                                               (__mmask8)-1, (int)(R)); })
3001*67e74705SXin Li 
3002*67e74705SXin Li 
3003*67e74705SXin Li #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3004*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3005*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
3006*67e74705SXin Li                                               -(__v8df)(__m512d)(C), \
3007*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
3008*67e74705SXin Li 
3009*67e74705SXin Li 
3010*67e74705SXin Li #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3011*67e74705SXin Li   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3012*67e74705SXin Li                                                (__v8df)(__m512d)(B), \
3013*67e74705SXin Li                                                -(__v8df)(__m512d)(C), \
3014*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
3015*67e74705SXin Li 
3016*67e74705SXin Li 
3017*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmaddsub_pd(__m512d __A,__m512d __B,__m512d __C)3018*67e74705SXin Li _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3019*67e74705SXin Li {
3020*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3021*67e74705SXin Li                                                        (__v8df) __B,
3022*67e74705SXin Li                                                        (__v8df) __C,
3023*67e74705SXin Li                                                        (__mmask8) -1,
3024*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3025*67e74705SXin Li }
3026*67e74705SXin Li 
3027*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)3028*67e74705SXin Li _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3029*67e74705SXin Li {
3030*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3031*67e74705SXin Li                                                        (__v8df) __B,
3032*67e74705SXin Li                                                        (__v8df) __C,
3033*67e74705SXin Li                                                        (__mmask8) __U,
3034*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3035*67e74705SXin Li }
3036*67e74705SXin Li 
3037*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)3038*67e74705SXin Li _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3039*67e74705SXin Li {
3040*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3041*67e74705SXin Li                                                         (__v8df) __B,
3042*67e74705SXin Li                                                         (__v8df) __C,
3043*67e74705SXin Li                                                         (__mmask8) __U,
3044*67e74705SXin Li                                                         _MM_FROUND_CUR_DIRECTION);
3045*67e74705SXin Li }
3046*67e74705SXin Li 
3047*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)3048*67e74705SXin Li _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3049*67e74705SXin Li {
3050*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3051*67e74705SXin Li                                                         (__v8df) __B,
3052*67e74705SXin Li                                                         (__v8df) __C,
3053*67e74705SXin Li                                                         (__mmask8) __U,
3054*67e74705SXin Li                                                         _MM_FROUND_CUR_DIRECTION);
3055*67e74705SXin Li }
3056*67e74705SXin Li 
3057*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_fmsubadd_pd(__m512d __A,__m512d __B,__m512d __C)3058*67e74705SXin Li _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3059*67e74705SXin Li {
3060*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3061*67e74705SXin Li                                                        (__v8df) __B,
3062*67e74705SXin Li                                                        -(__v8df) __C,
3063*67e74705SXin Li                                                        (__mmask8) -1,
3064*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3065*67e74705SXin Li }
3066*67e74705SXin Li 
3067*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)3068*67e74705SXin Li _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3069*67e74705SXin Li {
3070*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3071*67e74705SXin Li                                                        (__v8df) __B,
3072*67e74705SXin Li                                                        -(__v8df) __C,
3073*67e74705SXin Li                                                        (__mmask8) __U,
3074*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3075*67e74705SXin Li }
3076*67e74705SXin Li 
3077*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_pd(__mmask8 __U,__m512d __A,__m512d __B,__m512d __C)3078*67e74705SXin Li _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3079*67e74705SXin Li {
3080*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3081*67e74705SXin Li                                                         (__v8df) __B,
3082*67e74705SXin Li                                                         -(__v8df) __C,
3083*67e74705SXin Li                                                         (__mmask8) __U,
3084*67e74705SXin Li                                                         _MM_FROUND_CUR_DIRECTION);
3085*67e74705SXin Li }
3086*67e74705SXin Li 
3087*67e74705SXin Li #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3088*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3089*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
3090*67e74705SXin Li                                              (__v16sf)(__m512)(C), \
3091*67e74705SXin Li                                              (__mmask16)-1, (int)(R)); })
3092*67e74705SXin Li 
3093*67e74705SXin Li 
3094*67e74705SXin Li #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3095*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3096*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
3097*67e74705SXin Li                                              (__v16sf)(__m512)(C), \
3098*67e74705SXin Li                                              (__mmask16)(U), (int)(R)); })
3099*67e74705SXin Li 
3100*67e74705SXin Li 
3101*67e74705SXin Li #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3102*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3103*67e74705SXin Li                                               (__v16sf)(__m512)(B), \
3104*67e74705SXin Li                                               (__v16sf)(__m512)(C), \
3105*67e74705SXin Li                                               (__mmask16)(U), (int)(R)); })
3106*67e74705SXin Li 
3107*67e74705SXin Li 
3108*67e74705SXin Li #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3109*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3110*67e74705SXin Li                                               (__v16sf)(__m512)(B), \
3111*67e74705SXin Li                                               (__v16sf)(__m512)(C), \
3112*67e74705SXin Li                                               (__mmask16)(U), (int)(R)); })
3113*67e74705SXin Li 
3114*67e74705SXin Li 
3115*67e74705SXin Li #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3116*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3117*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
3118*67e74705SXin Li                                              -(__v16sf)(__m512)(C), \
3119*67e74705SXin Li                                              (__mmask16)-1, (int)(R)); })
3120*67e74705SXin Li 
3121*67e74705SXin Li 
3122*67e74705SXin Li #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3123*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3124*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
3125*67e74705SXin Li                                              -(__v16sf)(__m512)(C), \
3126*67e74705SXin Li                                              (__mmask16)(U), (int)(R)); })
3127*67e74705SXin Li 
3128*67e74705SXin Li 
3129*67e74705SXin Li #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3130*67e74705SXin Li   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3131*67e74705SXin Li                                               (__v16sf)(__m512)(B), \
3132*67e74705SXin Li                                               -(__v16sf)(__m512)(C), \
3133*67e74705SXin Li                                               (__mmask16)(U), (int)(R)); })
3134*67e74705SXin Li 
3135*67e74705SXin Li 
3136*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmaddsub_ps(__m512 __A,__m512 __B,__m512 __C)3137*67e74705SXin Li _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3138*67e74705SXin Li {
3139*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3140*67e74705SXin Li                                                       (__v16sf) __B,
3141*67e74705SXin Li                                                       (__v16sf) __C,
3142*67e74705SXin Li                                                       (__mmask16) -1,
3143*67e74705SXin Li                                                       _MM_FROUND_CUR_DIRECTION);
3144*67e74705SXin Li }
3145*67e74705SXin Li 
3146*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmaddsub_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)3147*67e74705SXin Li _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3148*67e74705SXin Li {
3149*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3150*67e74705SXin Li                                                       (__v16sf) __B,
3151*67e74705SXin Li                                                       (__v16sf) __C,
3152*67e74705SXin Li                                                       (__mmask16) __U,
3153*67e74705SXin Li                                                       _MM_FROUND_CUR_DIRECTION);
3154*67e74705SXin Li }
3155*67e74705SXin Li 
3156*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmaddsub_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)3157*67e74705SXin Li _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3158*67e74705SXin Li {
3159*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3160*67e74705SXin Li                                                        (__v16sf) __B,
3161*67e74705SXin Li                                                        (__v16sf) __C,
3162*67e74705SXin Li                                                        (__mmask16) __U,
3163*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3164*67e74705SXin Li }
3165*67e74705SXin Li 
3166*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmaddsub_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)3167*67e74705SXin Li _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3168*67e74705SXin Li {
3169*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3170*67e74705SXin Li                                                        (__v16sf) __B,
3171*67e74705SXin Li                                                        (__v16sf) __C,
3172*67e74705SXin Li                                                        (__mmask16) __U,
3173*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3174*67e74705SXin Li }
3175*67e74705SXin Li 
3176*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_fmsubadd_ps(__m512 __A,__m512 __B,__m512 __C)3177*67e74705SXin Li _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3178*67e74705SXin Li {
3179*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3180*67e74705SXin Li                                                       (__v16sf) __B,
3181*67e74705SXin Li                                                       -(__v16sf) __C,
3182*67e74705SXin Li                                                       (__mmask16) -1,
3183*67e74705SXin Li                                                       _MM_FROUND_CUR_DIRECTION);
3184*67e74705SXin Li }
3185*67e74705SXin Li 
3186*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fmsubadd_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)3187*67e74705SXin Li _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3188*67e74705SXin Li {
3189*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3190*67e74705SXin Li                                                       (__v16sf) __B,
3191*67e74705SXin Li                                                       -(__v16sf) __C,
3192*67e74705SXin Li                                                       (__mmask16) __U,
3193*67e74705SXin Li                                                       _MM_FROUND_CUR_DIRECTION);
3194*67e74705SXin Li }
3195*67e74705SXin Li 
3196*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_fmsubadd_ps(__mmask16 __U,__m512 __A,__m512 __B,__m512 __C)3197*67e74705SXin Li _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3198*67e74705SXin Li {
3199*67e74705SXin Li   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3200*67e74705SXin Li                                                        (__v16sf) __B,
3201*67e74705SXin Li                                                        -(__v16sf) __C,
3202*67e74705SXin Li                                                        (__mmask16) __U,
3203*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3204*67e74705SXin Li }
3205*67e74705SXin Li 
3206*67e74705SXin Li #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3207*67e74705SXin Li   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3208*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
3209*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
3210*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
3211*67e74705SXin Li 
3212*67e74705SXin Li 
3213*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)3214*67e74705SXin Li _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3215*67e74705SXin Li {
3216*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3217*67e74705SXin Li                                                      (__v8df) __B,
3218*67e74705SXin Li                                                      (__v8df) __C,
3219*67e74705SXin Li                                                      (__mmask8) __U,
3220*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
3221*67e74705SXin Li }
3222*67e74705SXin Li 
3223*67e74705SXin Li #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3224*67e74705SXin Li   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3225*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
3226*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
3227*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3228*67e74705SXin Li 
3229*67e74705SXin Li 
3230*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsub_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)3231*67e74705SXin Li _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3232*67e74705SXin Li {
3233*67e74705SXin Li   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3234*67e74705SXin Li                                                     (__v16sf) __B,
3235*67e74705SXin Li                                                     (__v16sf) __C,
3236*67e74705SXin Li                                                     (__mmask16) __U,
3237*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
3238*67e74705SXin Li }
3239*67e74705SXin Li 
3240*67e74705SXin Li #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3241*67e74705SXin Li   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3242*67e74705SXin Li                                                (__v8df)(__m512d)(B), \
3243*67e74705SXin Li                                                (__v8df)(__m512d)(C), \
3244*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
3245*67e74705SXin Li 
3246*67e74705SXin Li 
3247*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)3248*67e74705SXin Li _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3249*67e74705SXin Li {
3250*67e74705SXin Li   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3251*67e74705SXin Li                                                         (__v8df) __B,
3252*67e74705SXin Li                                                         (__v8df) __C,
3253*67e74705SXin Li                                                         (__mmask8) __U,
3254*67e74705SXin Li                                                         _MM_FROUND_CUR_DIRECTION);
3255*67e74705SXin Li }
3256*67e74705SXin Li 
3257*67e74705SXin Li #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3258*67e74705SXin Li   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3259*67e74705SXin Li                                               (__v16sf)(__m512)(B), \
3260*67e74705SXin Li                                               (__v16sf)(__m512)(C), \
3261*67e74705SXin Li                                               (__mmask16)(U), (int)(R)); })
3262*67e74705SXin Li 
3263*67e74705SXin Li 
3264*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fmsubadd_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)3265*67e74705SXin Li _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3266*67e74705SXin Li {
3267*67e74705SXin Li   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3268*67e74705SXin Li                                                        (__v16sf) __B,
3269*67e74705SXin Li                                                        (__v16sf) __C,
3270*67e74705SXin Li                                                        (__mmask16) __U,
3271*67e74705SXin Li                                                        _MM_FROUND_CUR_DIRECTION);
3272*67e74705SXin Li }
3273*67e74705SXin Li 
3274*67e74705SXin Li #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3275*67e74705SXin Li   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3276*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
3277*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
3278*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
3279*67e74705SXin Li 
3280*67e74705SXin Li 
3281*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)3282*67e74705SXin Li _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3283*67e74705SXin Li {
3284*67e74705SXin Li   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3285*67e74705SXin Li                                                      (__v8df) __B,
3286*67e74705SXin Li                                                      (__v8df) __C,
3287*67e74705SXin Li                                                      (__mmask8) __U,
3288*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
3289*67e74705SXin Li }
3290*67e74705SXin Li 
3291*67e74705SXin Li #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3292*67e74705SXin Li   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3293*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
3294*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
3295*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3296*67e74705SXin Li 
3297*67e74705SXin Li 
3298*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmadd_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)3299*67e74705SXin Li _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3300*67e74705SXin Li {
3301*67e74705SXin Li   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3302*67e74705SXin Li                                                     (__v16sf) __B,
3303*67e74705SXin Li                                                     (__v16sf) __C,
3304*67e74705SXin Li                                                     (__mmask16) __U,
3305*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
3306*67e74705SXin Li }
3307*67e74705SXin Li 
3308*67e74705SXin Li #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3309*67e74705SXin Li   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3310*67e74705SXin Li                                             (__v8df)(__m512d)(B), \
3311*67e74705SXin Li                                             (__v8df)(__m512d)(C), \
3312*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
3313*67e74705SXin Li 
3314*67e74705SXin Li 
3315*67e74705SXin Li #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3316*67e74705SXin Li   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3317*67e74705SXin Li                                              (__v8df)(__m512d)(B), \
3318*67e74705SXin Li                                              (__v8df)(__m512d)(C), \
3319*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
3320*67e74705SXin Li 
3321*67e74705SXin Li 
3322*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_pd(__m512d __A,__mmask8 __U,__m512d __B,__m512d __C)3323*67e74705SXin Li _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3324*67e74705SXin Li {
3325*67e74705SXin Li   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3326*67e74705SXin Li                                                      (__v8df) __B,
3327*67e74705SXin Li                                                      (__v8df) __C,
3328*67e74705SXin Li                                                      (__mmask8) __U,
3329*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
3330*67e74705SXin Li }
3331*67e74705SXin Li 
3332*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_pd(__m512d __A,__m512d __B,__m512d __C,__mmask8 __U)3333*67e74705SXin Li _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3334*67e74705SXin Li {
3335*67e74705SXin Li   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3336*67e74705SXin Li                                                       (__v8df) __B,
3337*67e74705SXin Li                                                       (__v8df) __C,
3338*67e74705SXin Li                                                       (__mmask8) __U,
3339*67e74705SXin Li                                                       _MM_FROUND_CUR_DIRECTION);
3340*67e74705SXin Li }
3341*67e74705SXin Li 
3342*67e74705SXin Li #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3343*67e74705SXin Li   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3344*67e74705SXin Li                                            (__v16sf)(__m512)(B), \
3345*67e74705SXin Li                                            (__v16sf)(__m512)(C), \
3346*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3347*67e74705SXin Li 
3348*67e74705SXin Li 
3349*67e74705SXin Li #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3350*67e74705SXin Li   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3351*67e74705SXin Li                                             (__v16sf)(__m512)(B), \
3352*67e74705SXin Li                                             (__v16sf)(__m512)(C), \
3353*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
3354*67e74705SXin Li 
3355*67e74705SXin Li 
3356*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_fnmsub_ps(__m512 __A,__mmask16 __U,__m512 __B,__m512 __C)3357*67e74705SXin Li _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3358*67e74705SXin Li {
3359*67e74705SXin Li   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3360*67e74705SXin Li                                                     (__v16sf) __B,
3361*67e74705SXin Li                                                     (__v16sf) __C,
3362*67e74705SXin Li                                                     (__mmask16) __U,
3363*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
3364*67e74705SXin Li }
3365*67e74705SXin Li 
3366*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask3_fnmsub_ps(__m512 __A,__m512 __B,__m512 __C,__mmask16 __U)3367*67e74705SXin Li _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3368*67e74705SXin Li {
3369*67e74705SXin Li   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3370*67e74705SXin Li                                                      (__v16sf) __B,
3371*67e74705SXin Li                                                      (__v16sf) __C,
3372*67e74705SXin Li                                                      (__mmask16) __U,
3373*67e74705SXin Li                                                      _MM_FROUND_CUR_DIRECTION);
3374*67e74705SXin Li }
3375*67e74705SXin Li 
3376*67e74705SXin Li 
3377*67e74705SXin Li 
3378*67e74705SXin Li /* Vector permutations */
3379*67e74705SXin Li 
3380*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi32(__m512i __A,__m512i __I,__m512i __B)3381*67e74705SXin Li _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3382*67e74705SXin Li {
3383*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3384*67e74705SXin Li                                                        /* idx */ ,
3385*67e74705SXin Li                                                        (__v16si) __A,
3386*67e74705SXin Li                                                        (__v16si) __B,
3387*67e74705SXin Li                                                        (__mmask16) -1);
3388*67e74705SXin Li }
3389*67e74705SXin Li 
3390*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_epi32(__m512i __A,__mmask16 __U,__m512i __I,__m512i __B)3391*67e74705SXin Li _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3392*67e74705SXin Li                                 __m512i __I, __m512i __B)
3393*67e74705SXin Li {
3394*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3395*67e74705SXin Li                                                         /* idx */ ,
3396*67e74705SXin Li                                                         (__v16si) __A,
3397*67e74705SXin Li                                                         (__v16si) __B,
3398*67e74705SXin Li                                                         (__mmask16) __U);
3399*67e74705SXin Li }
3400*67e74705SXin Li 
3401*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi32(__mmask16 __U,__m512i __A,__m512i __I,__m512i __B)3402*67e74705SXin Li _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3403*67e74705SXin Li                                  __m512i __I, __m512i __B)
3404*67e74705SXin Li {
3405*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3406*67e74705SXin Li                                                         /* idx */ ,
3407*67e74705SXin Li                                                         (__v16si) __A,
3408*67e74705SXin Li                                                         (__v16si) __B,
3409*67e74705SXin Li                                                         (__mmask16) __U);
3410*67e74705SXin Li }
3411*67e74705SXin Li 
3412*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi64(__m512i __A,__m512i __I,__m512i __B)3413*67e74705SXin Li _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3414*67e74705SXin Li {
3415*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3416*67e74705SXin Li                                                        /* idx */ ,
3417*67e74705SXin Li                                                        (__v8di) __A,
3418*67e74705SXin Li                                                        (__v8di) __B,
3419*67e74705SXin Li                                                        (__mmask8) -1);
3420*67e74705SXin Li }
3421*67e74705SXin Li 
3422*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_epi64(__m512i __A,__mmask8 __U,__m512i __I,__m512i __B)3423*67e74705SXin Li _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3424*67e74705SXin Li                                 __m512i __B)
3425*67e74705SXin Li {
3426*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3427*67e74705SXin Li                                                        /* idx */ ,
3428*67e74705SXin Li                                                        (__v8di) __A,
3429*67e74705SXin Li                                                        (__v8di) __B,
3430*67e74705SXin Li                                                        (__mmask8) __U);
3431*67e74705SXin Li }
3432*67e74705SXin Li 
3433*67e74705SXin Li 
3434*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi64(__mmask8 __U,__m512i __A,__m512i __I,__m512i __B)3435*67e74705SXin Li _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3436*67e74705SXin Li          __m512i __I, __m512i __B)
3437*67e74705SXin Li {
3438*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3439*67e74705SXin Li                                                         /* idx */ ,
3440*67e74705SXin Li                                                         (__v8di) __A,
3441*67e74705SXin Li                                                         (__v8di) __B,
3442*67e74705SXin Li                                                         (__mmask8) __U);
3443*67e74705SXin Li }
3444*67e74705SXin Li 
3445*67e74705SXin Li #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3446*67e74705SXin Li   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3447*67e74705SXin Li                                          (__v8di)(__m512i)(B), (int)(I), \
3448*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
3449*67e74705SXin Li                                          (__mmask8)-1); })
3450*67e74705SXin Li 
3451*67e74705SXin Li #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3452*67e74705SXin Li   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3453*67e74705SXin Li                                          (__v8di)(__m512i)(B), (int)(imm), \
3454*67e74705SXin Li                                          (__v8di)(__m512i)(W), \
3455*67e74705SXin Li                                          (__mmask8)(U)); })
3456*67e74705SXin Li 
3457*67e74705SXin Li #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3458*67e74705SXin Li   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3459*67e74705SXin Li                                          (__v8di)(__m512i)(B), (int)(imm), \
3460*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
3461*67e74705SXin Li                                          (__mmask8)(U)); })
3462*67e74705SXin Li 
3463*67e74705SXin Li #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3464*67e74705SXin Li   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3465*67e74705SXin Li                                          (__v16si)(__m512i)(B), (int)(I), \
3466*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
3467*67e74705SXin Li                                          (__mmask16)-1); })
3468*67e74705SXin Li 
3469*67e74705SXin Li #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3470*67e74705SXin Li   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3471*67e74705SXin Li                                          (__v16si)(__m512i)(B), (int)(imm), \
3472*67e74705SXin Li                                          (__v16si)(__m512i)(W), \
3473*67e74705SXin Li                                          (__mmask16)(U)); })
3474*67e74705SXin Li 
3475*67e74705SXin Li #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3476*67e74705SXin Li   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3477*67e74705SXin Li                                          (__v16si)(__m512i)(B), (int)(imm), \
3478*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
3479*67e74705SXin Li                                          (__mmask16)(U)); })
3480*67e74705SXin Li /* Vector Extract */
3481*67e74705SXin Li 
3482*67e74705SXin Li #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
3483*67e74705SXin Li   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3484*67e74705SXin Li                                             (__v4df)_mm256_setzero_si256(), \
3485*67e74705SXin Li                                             (__mmask8)-1); })
3486*67e74705SXin Li 
3487*67e74705SXin Li #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3488*67e74705SXin Li   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3489*67e74705SXin Li                                             (__v4df)(__m256d)(W), \
3490*67e74705SXin Li                                             (__mmask8)(U)); })
3491*67e74705SXin Li 
3492*67e74705SXin Li #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3493*67e74705SXin Li   (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3494*67e74705SXin Li                                             (__v4df)_mm256_setzero_pd(), \
3495*67e74705SXin Li                                             (__mmask8)(U)); })
3496*67e74705SXin Li 
3497*67e74705SXin Li #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
3498*67e74705SXin Li   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3499*67e74705SXin Li                                            (__v4sf)_mm_setzero_ps(), \
3500*67e74705SXin Li                                            (__mmask8)-1); })
3501*67e74705SXin Li 
3502*67e74705SXin Li #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3503*67e74705SXin Li   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3504*67e74705SXin Li                                            (__v4sf)(__m128)(W), \
3505*67e74705SXin Li                                            (__mmask8)(U)); })
3506*67e74705SXin Li 
3507*67e74705SXin Li #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3508*67e74705SXin Li   (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3509*67e74705SXin Li                                            (__v4sf)_mm_setzero_ps(), \
3510*67e74705SXin Li                                            (__mmask8)(U)); })
3511*67e74705SXin Li /* Vector Blend */
3512*67e74705SXin Li 
3513*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_mask_blend_pd(__mmask8 __U,__m512d __A,__m512d __W)3514*67e74705SXin Li _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3515*67e74705SXin Li {
3516*67e74705SXin Li   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3517*67e74705SXin Li                  (__v8df) __W,
3518*67e74705SXin Li                  (__v8df) __A);
3519*67e74705SXin Li }
3520*67e74705SXin Li 
3521*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_mask_blend_ps(__mmask16 __U,__m512 __A,__m512 __W)3522*67e74705SXin Li _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3523*67e74705SXin Li {
3524*67e74705SXin Li   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3525*67e74705SXin Li                 (__v16sf) __W,
3526*67e74705SXin Li                 (__v16sf) __A);
3527*67e74705SXin Li }
3528*67e74705SXin Li 
3529*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_blend_epi64(__mmask8 __U,__m512i __A,__m512i __W)3530*67e74705SXin Li _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3531*67e74705SXin Li {
3532*67e74705SXin Li   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3533*67e74705SXin Li                 (__v8di) __W,
3534*67e74705SXin Li                 (__v8di) __A);
3535*67e74705SXin Li }
3536*67e74705SXin Li 
3537*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_blend_epi32(__mmask16 __U,__m512i __A,__m512i __W)3538*67e74705SXin Li _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3539*67e74705SXin Li {
3540*67e74705SXin Li   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3541*67e74705SXin Li                 (__v16si) __W,
3542*67e74705SXin Li                 (__v16si) __A);
3543*67e74705SXin Li }
3544*67e74705SXin Li 
3545*67e74705SXin Li /* Compare */
3546*67e74705SXin Li 
3547*67e74705SXin Li #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3548*67e74705SXin Li   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3549*67e74705SXin Li                                           (__v16sf)(__m512)(B), (int)(P), \
3550*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
3551*67e74705SXin Li 
3552*67e74705SXin Li #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3553*67e74705SXin Li   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3554*67e74705SXin Li                                           (__v16sf)(__m512)(B), (int)(P), \
3555*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
3556*67e74705SXin Li 
3557*67e74705SXin Li #define _mm512_cmp_ps_mask(A, B, P) \
3558*67e74705SXin Li   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3559*67e74705SXin Li 
3560*67e74705SXin Li #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3561*67e74705SXin Li   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3562*67e74705SXin Li 
3563*67e74705SXin Li #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3564*67e74705SXin Li   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3565*67e74705SXin Li                                          (__v8df)(__m512d)(B), (int)(P), \
3566*67e74705SXin Li                                          (__mmask8)-1, (int)(R)); })
3567*67e74705SXin Li 
3568*67e74705SXin Li #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3569*67e74705SXin Li   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3570*67e74705SXin Li                                          (__v8df)(__m512d)(B), (int)(P), \
3571*67e74705SXin Li                                          (__mmask8)(U), (int)(R)); })
3572*67e74705SXin Li 
3573*67e74705SXin Li #define _mm512_cmp_pd_mask(A, B, P) \
3574*67e74705SXin Li   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3575*67e74705SXin Li 
3576*67e74705SXin Li #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3577*67e74705SXin Li   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3578*67e74705SXin Li 
3579*67e74705SXin Li /* Conversion */
3580*67e74705SXin Li 
3581*67e74705SXin Li #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3582*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3583*67e74705SXin Li                                              (__v16si)_mm512_undefined_epi32(), \
3584*67e74705SXin Li                                              (__mmask16)-1, (int)(R)); })
3585*67e74705SXin Li 
3586*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3587*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3588*67e74705SXin Li                                              (__v16si)(__m512i)(W), \
3589*67e74705SXin Li                                              (__mmask16)(U), (int)(R)); })
3590*67e74705SXin Li 
3591*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3592*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3593*67e74705SXin Li                                              (__v16si)_mm512_setzero_si512(), \
3594*67e74705SXin Li                                              (__mmask16)(U), (int)(R)); })
3595*67e74705SXin Li 
3596*67e74705SXin Li 
3597*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epu32(__m512 __A)3598*67e74705SXin Li _mm512_cvttps_epu32(__m512 __A)
3599*67e74705SXin Li {
3600*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3601*67e74705SXin Li                   (__v16si)
3602*67e74705SXin Li                   _mm512_setzero_si512 (),
3603*67e74705SXin Li                   (__mmask16) -1,
3604*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
3605*67e74705SXin Li }
3606*67e74705SXin Li 
3607*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epu32(__m512i __W,__mmask16 __U,__m512 __A)3608*67e74705SXin Li _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3609*67e74705SXin Li {
3610*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3611*67e74705SXin Li                    (__v16si) __W,
3612*67e74705SXin Li                    (__mmask16) __U,
3613*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
3614*67e74705SXin Li }
3615*67e74705SXin Li 
3616*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epu32(__mmask16 __U,__m512 __A)3617*67e74705SXin Li _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3618*67e74705SXin Li {
3619*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3620*67e74705SXin Li                    (__v16si) _mm512_setzero_si512 (),
3621*67e74705SXin Li                    (__mmask16) __U,
3622*67e74705SXin Li                    _MM_FROUND_CUR_DIRECTION);
3623*67e74705SXin Li }
3624*67e74705SXin Li 
3625*67e74705SXin Li #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3626*67e74705SXin Li   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3627*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
3628*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
3629*67e74705SXin Li 
3630*67e74705SXin Li #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3631*67e74705SXin Li   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3632*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
3633*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
3634*67e74705SXin Li 
3635*67e74705SXin Li #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3636*67e74705SXin Li   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3637*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
3638*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
3639*67e74705SXin Li 
3640*67e74705SXin Li #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3641*67e74705SXin Li   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3642*67e74705SXin Li                                            (__v16sf)_mm512_setzero_ps(), \
3643*67e74705SXin Li                                            (__mmask16)-1, (int)(R)); })
3644*67e74705SXin Li 
3645*67e74705SXin Li #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3646*67e74705SXin Li   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3647*67e74705SXin Li                                            (__v16sf)(__m512)(W), \
3648*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3649*67e74705SXin Li 
3650*67e74705SXin Li #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3651*67e74705SXin Li   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3652*67e74705SXin Li                                            (__v16sf)_mm512_setzero_ps(), \
3653*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3654*67e74705SXin Li 
3655*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_cvtepu32_ps(__m512i __A)3656*67e74705SXin Li _mm512_cvtepu32_ps (__m512i __A)
3657*67e74705SXin Li {
3658*67e74705SXin Li   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3659*67e74705SXin Li                  (__v16sf) _mm512_undefined_ps (),
3660*67e74705SXin Li                  (__mmask16) -1,
3661*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3662*67e74705SXin Li }
3663*67e74705SXin Li 
3664*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu32_ps(__m512 __W,__mmask16 __U,__m512i __A)3665*67e74705SXin Li _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3666*67e74705SXin Li {
3667*67e74705SXin Li   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3668*67e74705SXin Li                  (__v16sf) __W,
3669*67e74705SXin Li                  (__mmask16) __U,
3670*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3671*67e74705SXin Li }
3672*67e74705SXin Li 
3673*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu32_ps(__mmask16 __U,__m512i __A)3674*67e74705SXin Li _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3675*67e74705SXin Li {
3676*67e74705SXin Li   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3677*67e74705SXin Li                  (__v16sf) _mm512_setzero_ps (),
3678*67e74705SXin Li                  (__mmask16) __U,
3679*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3680*67e74705SXin Li }
3681*67e74705SXin Li 
3682*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepi32_pd(__m256i __A)3683*67e74705SXin Li _mm512_cvtepi32_pd(__m256i __A)
3684*67e74705SXin Li {
3685*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3686*67e74705SXin Li                 (__v8df)
3687*67e74705SXin Li                 _mm512_setzero_pd (),
3688*67e74705SXin Li                 (__mmask8) -1);
3689*67e74705SXin Li }
3690*67e74705SXin Li 
3691*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_pd(__m512d __W,__mmask8 __U,__m256i __A)3692*67e74705SXin Li _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3693*67e74705SXin Li {
3694*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3695*67e74705SXin Li                 (__v8df) __W,
3696*67e74705SXin Li                 (__mmask8) __U);
3697*67e74705SXin Li }
3698*67e74705SXin Li 
3699*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi32_pd(__mmask8 __U,__m256i __A)3700*67e74705SXin Li _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3701*67e74705SXin Li {
3702*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3703*67e74705SXin Li                 (__v8df) _mm512_setzero_pd (),
3704*67e74705SXin Li                 (__mmask8) __U);
3705*67e74705SXin Li }
3706*67e74705SXin Li 
3707*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_cvtepi32_ps(__m512i __A)3708*67e74705SXin Li _mm512_cvtepi32_ps (__m512i __A)
3709*67e74705SXin Li {
3710*67e74705SXin Li   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3711*67e74705SXin Li                 (__v16sf) _mm512_undefined_ps (),
3712*67e74705SXin Li                 (__mmask16) -1,
3713*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3714*67e74705SXin Li }
3715*67e74705SXin Li 
3716*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_ps(__m512 __W,__mmask16 __U,__m512i __A)3717*67e74705SXin Li _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3718*67e74705SXin Li {
3719*67e74705SXin Li   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3720*67e74705SXin Li                 (__v16sf) __W,
3721*67e74705SXin Li                 (__mmask16) __U,
3722*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3723*67e74705SXin Li }
3724*67e74705SXin Li 
3725*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi32_ps(__mmask16 __U,__m512i __A)3726*67e74705SXin Li _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3727*67e74705SXin Li {
3728*67e74705SXin Li   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3729*67e74705SXin Li                 (__v16sf) _mm512_setzero_ps (),
3730*67e74705SXin Li                 (__mmask16) __U,
3731*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3732*67e74705SXin Li }
3733*67e74705SXin Li 
3734*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepu32_pd(__m256i __A)3735*67e74705SXin Li _mm512_cvtepu32_pd(__m256i __A)
3736*67e74705SXin Li {
3737*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3738*67e74705SXin Li                 (__v8df)
3739*67e74705SXin Li                 _mm512_setzero_pd (),
3740*67e74705SXin Li                 (__mmask8) -1);
3741*67e74705SXin Li }
3742*67e74705SXin Li 
3743*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu32_pd(__m512d __W,__mmask8 __U,__m256i __A)3744*67e74705SXin Li _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3745*67e74705SXin Li {
3746*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3747*67e74705SXin Li                   (__v8df) __W,
3748*67e74705SXin Li                   (__mmask8) __U);
3749*67e74705SXin Li }
3750*67e74705SXin Li 
3751*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu32_pd(__mmask8 __U,__m256i __A)3752*67e74705SXin Li _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3753*67e74705SXin Li {
3754*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3755*67e74705SXin Li                   (__v8df) _mm512_setzero_pd (),
3756*67e74705SXin Li                   (__mmask8) __U);
3757*67e74705SXin Li }
3758*67e74705SXin Li 
3759*67e74705SXin Li #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3760*67e74705SXin Li   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3761*67e74705SXin Li                                           (__v8sf)_mm256_setzero_ps(), \
3762*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
3763*67e74705SXin Li 
3764*67e74705SXin Li #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3765*67e74705SXin Li   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3766*67e74705SXin Li                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
3767*67e74705SXin Li                                           (int)(R)); })
3768*67e74705SXin Li 
3769*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3770*67e74705SXin Li   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3771*67e74705SXin Li                                           (__v8sf)_mm256_setzero_ps(), \
3772*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
3773*67e74705SXin Li 
3774*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtpd_ps(__m512d __A)3775*67e74705SXin Li _mm512_cvtpd_ps (__m512d __A)
3776*67e74705SXin Li {
3777*67e74705SXin Li   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3778*67e74705SXin Li                 (__v8sf) _mm256_undefined_ps (),
3779*67e74705SXin Li                 (__mmask8) -1,
3780*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3781*67e74705SXin Li }
3782*67e74705SXin Li 
3783*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_ps(__m256 __W,__mmask8 __U,__m512d __A)3784*67e74705SXin Li _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3785*67e74705SXin Li {
3786*67e74705SXin Li   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3787*67e74705SXin Li                 (__v8sf) __W,
3788*67e74705SXin Li                 (__mmask8) __U,
3789*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3790*67e74705SXin Li }
3791*67e74705SXin Li 
3792*67e74705SXin Li static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_ps(__mmask8 __U,__m512d __A)3793*67e74705SXin Li _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3794*67e74705SXin Li {
3795*67e74705SXin Li   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3796*67e74705SXin Li                 (__v8sf) _mm256_setzero_ps (),
3797*67e74705SXin Li                 (__mmask8) __U,
3798*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3799*67e74705SXin Li }
3800*67e74705SXin Li 
3801*67e74705SXin Li #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
3802*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3803*67e74705SXin Li                                             (__v16hi)_mm256_undefined_si256(), \
3804*67e74705SXin Li                                             (__mmask16)-1); })
3805*67e74705SXin Li 
3806*67e74705SXin Li #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
3807*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3808*67e74705SXin Li                                             (__v16hi)(__m256i)(U), \
3809*67e74705SXin Li                                             (__mmask16)(W)); })
3810*67e74705SXin Li 
3811*67e74705SXin Li #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
3812*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3813*67e74705SXin Li                                             (__v16hi)_mm256_setzero_si256(), \
3814*67e74705SXin Li                                             (__mmask16)(W)); })
3815*67e74705SXin Li 
3816*67e74705SXin Li #define _mm512_cvtps_ph(A, I) __extension__ ({ \
3817*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3818*67e74705SXin Li                                             (__v16hi)_mm256_setzero_si256(), \
3819*67e74705SXin Li                                             (__mmask16)-1); })
3820*67e74705SXin Li 
3821*67e74705SXin Li #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
3822*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3823*67e74705SXin Li                                             (__v16hi)(__m256i)(U), \
3824*67e74705SXin Li                                             (__mmask16)(W)); })
3825*67e74705SXin Li 
3826*67e74705SXin Li #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
3827*67e74705SXin Li   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3828*67e74705SXin Li                                             (__v16hi)_mm256_setzero_si256(), \
3829*67e74705SXin Li                                             (__mmask16)(W)); })
3830*67e74705SXin Li 
3831*67e74705SXin Li #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
3832*67e74705SXin Li   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3833*67e74705SXin Li                                            (__v16sf)_mm512_undefined_ps(), \
3834*67e74705SXin Li                                            (__mmask16)-1, (int)(R)); })
3835*67e74705SXin Li 
3836*67e74705SXin Li #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
3837*67e74705SXin Li   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3838*67e74705SXin Li                                            (__v16sf)(__m512)(W), \
3839*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3840*67e74705SXin Li 
3841*67e74705SXin Li #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
3842*67e74705SXin Li   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3843*67e74705SXin Li                                            (__v16sf)_mm512_setzero_ps(), \
3844*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3845*67e74705SXin Li 
3846*67e74705SXin Li 
3847*67e74705SXin Li static  __inline __m512 __DEFAULT_FN_ATTRS
_mm512_cvtph_ps(__m256i __A)3848*67e74705SXin Li _mm512_cvtph_ps(__m256i __A)
3849*67e74705SXin Li {
3850*67e74705SXin Li   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3851*67e74705SXin Li                 (__v16sf)
3852*67e74705SXin Li                 _mm512_setzero_ps (),
3853*67e74705SXin Li                 (__mmask16) -1,
3854*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
3855*67e74705SXin Li }
3856*67e74705SXin Li 
3857*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_cvtph_ps(__m512 __W,__mmask16 __U,__m256i __A)3858*67e74705SXin Li _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3859*67e74705SXin Li {
3860*67e74705SXin Li   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3861*67e74705SXin Li                  (__v16sf) __W,
3862*67e74705SXin Li                  (__mmask16) __U,
3863*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3864*67e74705SXin Li }
3865*67e74705SXin Li 
3866*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtph_ps(__mmask16 __U,__m256i __A)3867*67e74705SXin Li _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3868*67e74705SXin Li {
3869*67e74705SXin Li   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3870*67e74705SXin Li                  (__v16sf) _mm512_setzero_ps (),
3871*67e74705SXin Li                  (__mmask16) __U,
3872*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3873*67e74705SXin Li }
3874*67e74705SXin Li 
3875*67e74705SXin Li #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
3876*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3877*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
3878*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
3879*67e74705SXin Li 
3880*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
3881*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3882*67e74705SXin Li                                             (__v8si)(__m256i)(W), \
3883*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
3884*67e74705SXin Li 
3885*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
3886*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3887*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
3888*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
3889*67e74705SXin Li 
3890*67e74705SXin Li static __inline __m256i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epi32(__m512d __a)3891*67e74705SXin Li _mm512_cvttpd_epi32(__m512d __a)
3892*67e74705SXin Li {
3893*67e74705SXin Li   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3894*67e74705SXin Li                                                    (__v8si)_mm256_setzero_si256(),
3895*67e74705SXin Li                                                    (__mmask8) -1,
3896*67e74705SXin Li                                                     _MM_FROUND_CUR_DIRECTION);
3897*67e74705SXin Li }
3898*67e74705SXin Li 
3899*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epi32(__m256i __W,__mmask8 __U,__m512d __A)3900*67e74705SXin Li _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3901*67e74705SXin Li {
3902*67e74705SXin Li   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3903*67e74705SXin Li                   (__v8si) __W,
3904*67e74705SXin Li                   (__mmask8) __U,
3905*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
3906*67e74705SXin Li }
3907*67e74705SXin Li 
3908*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epi32(__mmask8 __U,__m512d __A)3909*67e74705SXin Li _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3910*67e74705SXin Li {
3911*67e74705SXin Li   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3912*67e74705SXin Li                   (__v8si) _mm256_setzero_si256 (),
3913*67e74705SXin Li                   (__mmask8) __U,
3914*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
3915*67e74705SXin Li }
3916*67e74705SXin Li 
3917*67e74705SXin Li #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
3918*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3919*67e74705SXin Li                                             (__v16si)_mm512_setzero_si512(), \
3920*67e74705SXin Li                                             (__mmask16)-1, (int)(R)); })
3921*67e74705SXin Li 
3922*67e74705SXin Li #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
3923*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3924*67e74705SXin Li                                             (__v16si)(__m512i)(W), \
3925*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
3926*67e74705SXin Li 
3927*67e74705SXin Li #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
3928*67e74705SXin Li   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3929*67e74705SXin Li                                             (__v16si)_mm512_setzero_si512(), \
3930*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
3931*67e74705SXin Li 
3932*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epi32(__m512 __a)3933*67e74705SXin Li _mm512_cvttps_epi32(__m512 __a)
3934*67e74705SXin Li {
3935*67e74705SXin Li   return (__m512i)
3936*67e74705SXin Li     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3937*67e74705SXin Li                                      (__v16si) _mm512_setzero_si512 (),
3938*67e74705SXin Li                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3939*67e74705SXin Li }
3940*67e74705SXin Li 
3941*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epi32(__m512i __W,__mmask16 __U,__m512 __A)3942*67e74705SXin Li _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3943*67e74705SXin Li {
3944*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3945*67e74705SXin Li                   (__v16si) __W,
3946*67e74705SXin Li                   (__mmask16) __U,
3947*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
3948*67e74705SXin Li }
3949*67e74705SXin Li 
3950*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epi32(__mmask16 __U,__m512 __A)3951*67e74705SXin Li _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3952*67e74705SXin Li {
3953*67e74705SXin Li   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3954*67e74705SXin Li                   (__v16si) _mm512_setzero_si512 (),
3955*67e74705SXin Li                   (__mmask16) __U,
3956*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
3957*67e74705SXin Li }
3958*67e74705SXin Li 
3959*67e74705SXin Li #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
3960*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3961*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
3962*67e74705SXin Li                                            (__mmask16)-1, (int)(R)); })
3963*67e74705SXin Li 
3964*67e74705SXin Li #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
3965*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3966*67e74705SXin Li                                            (__v16si)(__m512i)(W), \
3967*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3968*67e74705SXin Li 
3969*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
3970*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3971*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
3972*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
3973*67e74705SXin Li 
3974*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epi32(__m512 __A)3975*67e74705SXin Li _mm512_cvtps_epi32 (__m512 __A)
3976*67e74705SXin Li {
3977*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3978*67e74705SXin Li                  (__v16si) _mm512_undefined_epi32 (),
3979*67e74705SXin Li                  (__mmask16) -1,
3980*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3981*67e74705SXin Li }
3982*67e74705SXin Li 
3983*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epi32(__m512i __W,__mmask16 __U,__m512 __A)3984*67e74705SXin Li _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3985*67e74705SXin Li {
3986*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3987*67e74705SXin Li                  (__v16si) __W,
3988*67e74705SXin Li                  (__mmask16) __U,
3989*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
3990*67e74705SXin Li }
3991*67e74705SXin Li 
3992*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epi32(__mmask16 __U,__m512 __A)3993*67e74705SXin Li _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3994*67e74705SXin Li {
3995*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3996*67e74705SXin Li                  (__v16si)
3997*67e74705SXin Li                  _mm512_setzero_si512 (),
3998*67e74705SXin Li                  (__mmask16) __U,
3999*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
4000*67e74705SXin Li }
4001*67e74705SXin Li 
4002*67e74705SXin Li #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4003*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4004*67e74705SXin Li                                            (__v8si)_mm256_setzero_si256(), \
4005*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
4006*67e74705SXin Li 
4007*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4008*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4009*67e74705SXin Li                                            (__v8si)(__m256i)(W), \
4010*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
4011*67e74705SXin Li 
4012*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4013*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4014*67e74705SXin Li                                            (__v8si)_mm256_setzero_si256(), \
4015*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
4016*67e74705SXin Li 
4017*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epi32(__m512d __A)4018*67e74705SXin Li _mm512_cvtpd_epi32 (__m512d __A)
4019*67e74705SXin Li {
4020*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4021*67e74705SXin Li                  (__v8si)
4022*67e74705SXin Li                  _mm256_undefined_si256 (),
4023*67e74705SXin Li                  (__mmask8) -1,
4024*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
4025*67e74705SXin Li }
4026*67e74705SXin Li 
4027*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epi32(__m256i __W,__mmask8 __U,__m512d __A)4028*67e74705SXin Li _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4029*67e74705SXin Li {
4030*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4031*67e74705SXin Li                  (__v8si) __W,
4032*67e74705SXin Li                  (__mmask8) __U,
4033*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
4034*67e74705SXin Li }
4035*67e74705SXin Li 
4036*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epi32(__mmask8 __U,__m512d __A)4037*67e74705SXin Li _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4038*67e74705SXin Li {
4039*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4040*67e74705SXin Li                  (__v8si)
4041*67e74705SXin Li                  _mm256_setzero_si256 (),
4042*67e74705SXin Li                  (__mmask8) __U,
4043*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
4044*67e74705SXin Li }
4045*67e74705SXin Li 
4046*67e74705SXin Li #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4047*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4048*67e74705SXin Li                                             (__v16si)_mm512_setzero_si512(), \
4049*67e74705SXin Li                                             (__mmask16)-1, (int)(R)); })
4050*67e74705SXin Li 
4051*67e74705SXin Li #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4052*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4053*67e74705SXin Li                                             (__v16si)(__m512i)(W), \
4054*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
4055*67e74705SXin Li 
4056*67e74705SXin Li #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4057*67e74705SXin Li   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4058*67e74705SXin Li                                             (__v16si)_mm512_setzero_si512(), \
4059*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
4060*67e74705SXin Li 
4061*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epu32(__m512 __A)4062*67e74705SXin Li _mm512_cvtps_epu32 ( __m512 __A)
4063*67e74705SXin Li {
4064*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4065*67e74705SXin Li                   (__v16si)\
4066*67e74705SXin Li                   _mm512_undefined_epi32 (),\
4067*67e74705SXin Li                   (__mmask16) -1,\
4068*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);\
4069*67e74705SXin Li }
4070*67e74705SXin Li 
4071*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epu32(__m512i __W,__mmask16 __U,__m512 __A)4072*67e74705SXin Li _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4073*67e74705SXin Li {
4074*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4075*67e74705SXin Li                   (__v16si) __W,
4076*67e74705SXin Li                   (__mmask16) __U,
4077*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
4078*67e74705SXin Li }
4079*67e74705SXin Li 
4080*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epu32(__mmask16 __U,__m512 __A)4081*67e74705SXin Li _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4082*67e74705SXin Li {
4083*67e74705SXin Li   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4084*67e74705SXin Li                   (__v16si)
4085*67e74705SXin Li                   _mm512_setzero_si512 (),
4086*67e74705SXin Li                   (__mmask16) __U ,
4087*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
4088*67e74705SXin Li }
4089*67e74705SXin Li 
4090*67e74705SXin Li #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4091*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4092*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
4093*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
4094*67e74705SXin Li 
4095*67e74705SXin Li #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4096*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4097*67e74705SXin Li                                             (__v8si)(W), \
4098*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
4099*67e74705SXin Li 
4100*67e74705SXin Li #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4101*67e74705SXin Li   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4102*67e74705SXin Li                                             (__v8si)_mm256_setzero_si256(), \
4103*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
4104*67e74705SXin Li 
4105*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epu32(__m512d __A)4106*67e74705SXin Li _mm512_cvtpd_epu32 (__m512d __A)
4107*67e74705SXin Li {
4108*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4109*67e74705SXin Li                   (__v8si)
4110*67e74705SXin Li                   _mm256_undefined_si256 (),
4111*67e74705SXin Li                   (__mmask8) -1,
4112*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
4113*67e74705SXin Li }
4114*67e74705SXin Li 
4115*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epu32(__m256i __W,__mmask8 __U,__m512d __A)4116*67e74705SXin Li _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4117*67e74705SXin Li {
4118*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4119*67e74705SXin Li                   (__v8si) __W,
4120*67e74705SXin Li                   (__mmask8) __U,
4121*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
4122*67e74705SXin Li }
4123*67e74705SXin Li 
4124*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epu32(__mmask8 __U,__m512d __A)4125*67e74705SXin Li _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4126*67e74705SXin Li {
4127*67e74705SXin Li   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4128*67e74705SXin Li                   (__v8si)
4129*67e74705SXin Li                   _mm256_setzero_si256 (),
4130*67e74705SXin Li                   (__mmask8) __U,
4131*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
4132*67e74705SXin Li }
4133*67e74705SXin Li 
4134*67e74705SXin Li /* Unpack and Interleave */
4135*67e74705SXin Li 
4136*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_unpackhi_pd(__m512d __a,__m512d __b)4137*67e74705SXin Li _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4138*67e74705SXin Li {
4139*67e74705SXin Li   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4140*67e74705SXin Li                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4141*67e74705SXin Li }
4142*67e74705SXin Li 
4143*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_unpackhi_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)4144*67e74705SXin Li _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4145*67e74705SXin Li {
4146*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4147*67e74705SXin Li                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4148*67e74705SXin Li                                            (__v8df)__W);
4149*67e74705SXin Li }
4150*67e74705SXin Li 
4151*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_unpackhi_pd(__mmask8 __U,__m512d __A,__m512d __B)4152*67e74705SXin Li _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4153*67e74705SXin Li {
4154*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4155*67e74705SXin Li                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4156*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd());
4157*67e74705SXin Li }
4158*67e74705SXin Li 
4159*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_unpacklo_pd(__m512d __a,__m512d __b)4160*67e74705SXin Li _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4161*67e74705SXin Li {
4162*67e74705SXin Li   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4163*67e74705SXin Li                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4164*67e74705SXin Li }
4165*67e74705SXin Li 
4166*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_unpacklo_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)4167*67e74705SXin Li _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4168*67e74705SXin Li {
4169*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4170*67e74705SXin Li                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4171*67e74705SXin Li                                            (__v8df)__W);
4172*67e74705SXin Li }
4173*67e74705SXin Li 
4174*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_unpacklo_pd(__mmask8 __U,__m512d __A,__m512d __B)4175*67e74705SXin Li _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4176*67e74705SXin Li {
4177*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4178*67e74705SXin Li                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4179*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd());
4180*67e74705SXin Li }
4181*67e74705SXin Li 
4182*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_unpackhi_ps(__m512 __a,__m512 __b)4183*67e74705SXin Li _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4184*67e74705SXin Li {
4185*67e74705SXin Li   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4186*67e74705SXin Li                                          2,    18,    3,    19,
4187*67e74705SXin Li                                          2+4,  18+4,  3+4,  19+4,
4188*67e74705SXin Li                                          2+8,  18+8,  3+8,  19+8,
4189*67e74705SXin Li                                          2+12, 18+12, 3+12, 19+12);
4190*67e74705SXin Li }
4191*67e74705SXin Li 
4192*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_unpackhi_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)4193*67e74705SXin Li _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4194*67e74705SXin Li {
4195*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4196*67e74705SXin Li                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4197*67e74705SXin Li                                           (__v16sf)__W);
4198*67e74705SXin Li }
4199*67e74705SXin Li 
4200*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_unpackhi_ps(__mmask16 __U,__m512 __A,__m512 __B)4201*67e74705SXin Li _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4202*67e74705SXin Li {
4203*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4204*67e74705SXin Li                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4205*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps());
4206*67e74705SXin Li }
4207*67e74705SXin Li 
4208*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_unpacklo_ps(__m512 __a,__m512 __b)4209*67e74705SXin Li _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4210*67e74705SXin Li {
4211*67e74705SXin Li   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4212*67e74705SXin Li                                          0,    16,    1,    17,
4213*67e74705SXin Li                                          0+4,  16+4,  1+4,  17+4,
4214*67e74705SXin Li                                          0+8,  16+8,  1+8,  17+8,
4215*67e74705SXin Li                                          0+12, 16+12, 1+12, 17+12);
4216*67e74705SXin Li }
4217*67e74705SXin Li 
4218*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_unpacklo_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)4219*67e74705SXin Li _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4220*67e74705SXin Li {
4221*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4222*67e74705SXin Li                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4223*67e74705SXin Li                                           (__v16sf)__W);
4224*67e74705SXin Li }
4225*67e74705SXin Li 
4226*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_unpacklo_ps(__mmask16 __U,__m512 __A,__m512 __B)4227*67e74705SXin Li _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4228*67e74705SXin Li {
4229*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4230*67e74705SXin Li                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4231*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps());
4232*67e74705SXin Li }
4233*67e74705SXin Li 
4234*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_unpackhi_epi32(__m512i __A,__m512i __B)4235*67e74705SXin Li _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4236*67e74705SXin Li {
4237*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4238*67e74705SXin Li                                           2,    18,    3,    19,
4239*67e74705SXin Li                                           2+4,  18+4,  3+4,  19+4,
4240*67e74705SXin Li                                           2+8,  18+8,  3+8,  19+8,
4241*67e74705SXin Li                                           2+12, 18+12, 3+12, 19+12);
4242*67e74705SXin Li }
4243*67e74705SXin Li 
4244*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_unpackhi_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)4245*67e74705SXin Li _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4246*67e74705SXin Li {
4247*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4248*67e74705SXin Li                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4249*67e74705SXin Li                                        (__v16si)__W);
4250*67e74705SXin Li }
4251*67e74705SXin Li 
4252*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_unpackhi_epi32(__mmask16 __U,__m512i __A,__m512i __B)4253*67e74705SXin Li _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4254*67e74705SXin Li {
4255*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4256*67e74705SXin Li                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4257*67e74705SXin Li                                        (__v16si)_mm512_setzero_si512());
4258*67e74705SXin Li }
4259*67e74705SXin Li 
4260*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_unpacklo_epi32(__m512i __A,__m512i __B)4261*67e74705SXin Li _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4262*67e74705SXin Li {
4263*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4264*67e74705SXin Li                                           0,    16,    1,    17,
4265*67e74705SXin Li                                           0+4,  16+4,  1+4,  17+4,
4266*67e74705SXin Li                                           0+8,  16+8,  1+8,  17+8,
4267*67e74705SXin Li                                           0+12, 16+12, 1+12, 17+12);
4268*67e74705SXin Li }
4269*67e74705SXin Li 
4270*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_unpacklo_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)4271*67e74705SXin Li _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4272*67e74705SXin Li {
4273*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4274*67e74705SXin Li                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4275*67e74705SXin Li                                        (__v16si)__W);
4276*67e74705SXin Li }
4277*67e74705SXin Li 
4278*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_unpacklo_epi32(__mmask16 __U,__m512i __A,__m512i __B)4279*67e74705SXin Li _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4280*67e74705SXin Li {
4281*67e74705SXin Li   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4282*67e74705SXin Li                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4283*67e74705SXin Li                                        (__v16si)_mm512_setzero_si512());
4284*67e74705SXin Li }
4285*67e74705SXin Li 
4286*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_unpackhi_epi64(__m512i __A,__m512i __B)4287*67e74705SXin Li _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4288*67e74705SXin Li {
4289*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4290*67e74705SXin Li                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4291*67e74705SXin Li }
4292*67e74705SXin Li 
4293*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_unpackhi_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)4294*67e74705SXin Li _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4295*67e74705SXin Li {
4296*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4297*67e74705SXin Li                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4298*67e74705SXin Li                                         (__v8di)__W);
4299*67e74705SXin Li }
4300*67e74705SXin Li 
4301*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_unpackhi_epi64(__mmask8 __U,__m512i __A,__m512i __B)4302*67e74705SXin Li _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4303*67e74705SXin Li {
4304*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4305*67e74705SXin Li                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4306*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512());
4307*67e74705SXin Li }
4308*67e74705SXin Li 
4309*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_unpacklo_epi64(__m512i __A,__m512i __B)4310*67e74705SXin Li _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4311*67e74705SXin Li {
4312*67e74705SXin Li   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4313*67e74705SXin Li                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4314*67e74705SXin Li }
4315*67e74705SXin Li 
4316*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_unpacklo_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)4317*67e74705SXin Li _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4318*67e74705SXin Li {
4319*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4320*67e74705SXin Li                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4321*67e74705SXin Li                                         (__v8di)__W);
4322*67e74705SXin Li }
4323*67e74705SXin Li 
4324*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_unpacklo_epi64(__mmask8 __U,__m512i __A,__m512i __B)4325*67e74705SXin Li _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4326*67e74705SXin Li {
4327*67e74705SXin Li   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4328*67e74705SXin Li                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4329*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512());
4330*67e74705SXin Li }
4331*67e74705SXin Li 
4332*67e74705SXin Li /* Bit Test */
4333*67e74705SXin Li 
4334*67e74705SXin Li static __inline __mmask16 __DEFAULT_FN_ATTRS
_mm512_test_epi32_mask(__m512i __A,__m512i __B)4335*67e74705SXin Li _mm512_test_epi32_mask(__m512i __A, __m512i __B)
4336*67e74705SXin Li {
4337*67e74705SXin Li   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4338*67e74705SXin Li             (__v16si) __B,
4339*67e74705SXin Li             (__mmask16) -1);
4340*67e74705SXin Li }
4341*67e74705SXin Li 
4342*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi32_mask(__mmask16 __U,__m512i __A,__m512i __B)4343*67e74705SXin Li _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4344*67e74705SXin Li {
4345*67e74705SXin Li   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4346*67e74705SXin Li                  (__v16si) __B, __U);
4347*67e74705SXin Li }
4348*67e74705SXin Li 
4349*67e74705SXin Li static __inline __mmask8 __DEFAULT_FN_ATTRS
_mm512_test_epi64_mask(__m512i __A,__m512i __B)4350*67e74705SXin Li _mm512_test_epi64_mask(__m512i __A, __m512i __B)
4351*67e74705SXin Li {
4352*67e74705SXin Li   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4353*67e74705SXin Li                  (__v8di) __B,
4354*67e74705SXin Li                  (__mmask8) -1);
4355*67e74705SXin Li }
4356*67e74705SXin Li 
4357*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_test_epi64_mask(__mmask8 __U,__m512i __A,__m512i __B)4358*67e74705SXin Li _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4359*67e74705SXin Li {
4360*67e74705SXin Li   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4361*67e74705SXin Li }
4362*67e74705SXin Li 
4363*67e74705SXin Li 
4364*67e74705SXin Li /* SIMD load ops */
4365*67e74705SXin Li 
4366*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_loadu_si512(void const * __P)4367*67e74705SXin Li _mm512_loadu_si512 (void const *__P)
4368*67e74705SXin Li {
4369*67e74705SXin Li   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4370*67e74705SXin Li                   (__v16si)
4371*67e74705SXin Li                   _mm512_setzero_si512 (),
4372*67e74705SXin Li                   (__mmask16) -1);
4373*67e74705SXin Li }
4374*67e74705SXin Li 
4375*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_loadu_epi32(__m512i __W,__mmask16 __U,void const * __P)4376*67e74705SXin Li _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4377*67e74705SXin Li {
4378*67e74705SXin Li   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4379*67e74705SXin Li                   (__v16si) __W,
4380*67e74705SXin Li                   (__mmask16) __U);
4381*67e74705SXin Li }
4382*67e74705SXin Li 
4383*67e74705SXin Li 
4384*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi32(__mmask16 __U,void const * __P)4385*67e74705SXin Li _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4386*67e74705SXin Li {
4387*67e74705SXin Li   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4388*67e74705SXin Li                                                      (__v16si)
4389*67e74705SXin Li                                                      _mm512_setzero_si512 (),
4390*67e74705SXin Li                                                      (__mmask16) __U);
4391*67e74705SXin Li }
4392*67e74705SXin Li 
4393*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_loadu_epi64(__m512i __W,__mmask8 __U,void const * __P)4394*67e74705SXin Li _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4395*67e74705SXin Li {
4396*67e74705SXin Li   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4397*67e74705SXin Li                   (__v8di) __W,
4398*67e74705SXin Li                   (__mmask8) __U);
4399*67e74705SXin Li }
4400*67e74705SXin Li 
4401*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_epi64(__mmask8 __U,void const * __P)4402*67e74705SXin Li _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4403*67e74705SXin Li {
4404*67e74705SXin Li   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4405*67e74705SXin Li                                                      (__v8di)
4406*67e74705SXin Li                                                      _mm512_setzero_si512 (),
4407*67e74705SXin Li                                                      (__mmask8) __U);
4408*67e74705SXin Li }
4409*67e74705SXin Li 
4410*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_mask_loadu_ps(__m512 __W,__mmask16 __U,void const * __P)4411*67e74705SXin Li _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4412*67e74705SXin Li {
4413*67e74705SXin Li   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4414*67e74705SXin Li                    (__v16sf) __W,
4415*67e74705SXin Li                    (__mmask16) __U);
4416*67e74705SXin Li }
4417*67e74705SXin Li 
4418*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_ps(__mmask16 __U,void const * __P)4419*67e74705SXin Li _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4420*67e74705SXin Li {
4421*67e74705SXin Li   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4422*67e74705SXin Li                                                   (__v16sf)
4423*67e74705SXin Li                                                   _mm512_setzero_ps (),
4424*67e74705SXin Li                                                   (__mmask16) __U);
4425*67e74705SXin Li }
4426*67e74705SXin Li 
4427*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_mask_loadu_pd(__m512d __W,__mmask8 __U,void const * __P)4428*67e74705SXin Li _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4429*67e74705SXin Li {
4430*67e74705SXin Li   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4431*67e74705SXin Li                 (__v8df) __W,
4432*67e74705SXin Li                 (__mmask8) __U);
4433*67e74705SXin Li }
4434*67e74705SXin Li 
4435*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_loadu_pd(__mmask8 __U,void const * __P)4436*67e74705SXin Li _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4437*67e74705SXin Li {
4438*67e74705SXin Li   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4439*67e74705SXin Li                                                    (__v8df)
4440*67e74705SXin Li                                                    _mm512_setzero_pd (),
4441*67e74705SXin Li                                                    (__mmask8) __U);
4442*67e74705SXin Li }
4443*67e74705SXin Li 
4444*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_loadu_pd(double const * __p)4445*67e74705SXin Li _mm512_loadu_pd(double const *__p)
4446*67e74705SXin Li {
4447*67e74705SXin Li   struct __loadu_pd {
4448*67e74705SXin Li     __m512d __v;
4449*67e74705SXin Li   } __attribute__((__packed__, __may_alias__));
4450*67e74705SXin Li   return ((struct __loadu_pd*)__p)->__v;
4451*67e74705SXin Li }
4452*67e74705SXin Li 
4453*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_loadu_ps(float const * __p)4454*67e74705SXin Li _mm512_loadu_ps(float const *__p)
4455*67e74705SXin Li {
4456*67e74705SXin Li   struct __loadu_ps {
4457*67e74705SXin Li     __m512 __v;
4458*67e74705SXin Li   } __attribute__((__packed__, __may_alias__));
4459*67e74705SXin Li   return ((struct __loadu_ps*)__p)->__v;
4460*67e74705SXin Li }
4461*67e74705SXin Li 
4462*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_load_ps(float const * __p)4463*67e74705SXin Li _mm512_load_ps(float const *__p)
4464*67e74705SXin Li {
4465*67e74705SXin Li   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4466*67e74705SXin Li                                                   (__v16sf)
4467*67e74705SXin Li                                                   _mm512_setzero_ps (),
4468*67e74705SXin Li                                                   (__mmask16) -1);
4469*67e74705SXin Li }
4470*67e74705SXin Li 
4471*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_mask_load_ps(__m512 __W,__mmask16 __U,void const * __P)4472*67e74705SXin Li _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4473*67e74705SXin Li {
4474*67e74705SXin Li   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4475*67e74705SXin Li                    (__v16sf) __W,
4476*67e74705SXin Li                    (__mmask16) __U);
4477*67e74705SXin Li }
4478*67e74705SXin Li 
4479*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_load_ps(__mmask16 __U,void const * __P)4480*67e74705SXin Li _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4481*67e74705SXin Li {
4482*67e74705SXin Li   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4483*67e74705SXin Li                                                   (__v16sf)
4484*67e74705SXin Li                                                   _mm512_setzero_ps (),
4485*67e74705SXin Li                                                   (__mmask16) __U);
4486*67e74705SXin Li }
4487*67e74705SXin Li 
4488*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_load_pd(double const * __p)4489*67e74705SXin Li _mm512_load_pd(double const *__p)
4490*67e74705SXin Li {
4491*67e74705SXin Li   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4492*67e74705SXin Li                                                    (__v8df)
4493*67e74705SXin Li                                                    _mm512_setzero_pd (),
4494*67e74705SXin Li                                                    (__mmask8) -1);
4495*67e74705SXin Li }
4496*67e74705SXin Li 
4497*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_mask_load_pd(__m512d __W,__mmask8 __U,void const * __P)4498*67e74705SXin Li _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4499*67e74705SXin Li {
4500*67e74705SXin Li   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4501*67e74705SXin Li                           (__v8df) __W,
4502*67e74705SXin Li                           (__mmask8) __U);
4503*67e74705SXin Li }
4504*67e74705SXin Li 
4505*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_load_pd(__mmask8 __U,void const * __P)4506*67e74705SXin Li _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4507*67e74705SXin Li {
4508*67e74705SXin Li   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4509*67e74705SXin Li                                                    (__v8df)
4510*67e74705SXin Li                                                    _mm512_setzero_pd (),
4511*67e74705SXin Li                                                    (__mmask8) __U);
4512*67e74705SXin Li }
4513*67e74705SXin Li 
4514*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_load_si512(void const * __P)4515*67e74705SXin Li _mm512_load_si512 (void const *__P)
4516*67e74705SXin Li {
4517*67e74705SXin Li   return *(__m512i *) __P;
4518*67e74705SXin Li }
4519*67e74705SXin Li 
4520*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_load_epi32(void const * __P)4521*67e74705SXin Li _mm512_load_epi32 (void const *__P)
4522*67e74705SXin Li {
4523*67e74705SXin Li   return *(__m512i *) __P;
4524*67e74705SXin Li }
4525*67e74705SXin Li 
4526*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_load_epi64(void const * __P)4527*67e74705SXin Li _mm512_load_epi64 (void const *__P)
4528*67e74705SXin Li {
4529*67e74705SXin Li   return *(__m512i *) __P;
4530*67e74705SXin Li }
4531*67e74705SXin Li 
4532*67e74705SXin Li /* SIMD store ops */
4533*67e74705SXin Li 
4534*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_storeu_epi64(void * __P,__mmask8 __U,__m512i __A)4535*67e74705SXin Li _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4536*67e74705SXin Li {
4537*67e74705SXin Li   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4538*67e74705SXin Li                                      (__mmask8) __U);
4539*67e74705SXin Li }
4540*67e74705SXin Li 
4541*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_storeu_si512(void * __P,__m512i __A)4542*67e74705SXin Li _mm512_storeu_si512 (void *__P, __m512i __A)
4543*67e74705SXin Li {
4544*67e74705SXin Li   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4545*67e74705SXin Li             (__mmask16) -1);
4546*67e74705SXin Li }
4547*67e74705SXin Li 
4548*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_storeu_epi32(void * __P,__mmask16 __U,__m512i __A)4549*67e74705SXin Li _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4550*67e74705SXin Li {
4551*67e74705SXin Li   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4552*67e74705SXin Li                                      (__mmask16) __U);
4553*67e74705SXin Li }
4554*67e74705SXin Li 
4555*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_storeu_pd(void * __P,__mmask8 __U,__m512d __A)4556*67e74705SXin Li _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4557*67e74705SXin Li {
4558*67e74705SXin Li   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4559*67e74705SXin Li }
4560*67e74705SXin Li 
4561*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_storeu_pd(void * __P,__m512d __A)4562*67e74705SXin Li _mm512_storeu_pd(void *__P, __m512d __A)
4563*67e74705SXin Li {
4564*67e74705SXin Li   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4565*67e74705SXin Li }
4566*67e74705SXin Li 
4567*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_storeu_ps(void * __P,__mmask16 __U,__m512 __A)4568*67e74705SXin Li _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4569*67e74705SXin Li {
4570*67e74705SXin Li   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4571*67e74705SXin Li                                    (__mmask16) __U);
4572*67e74705SXin Li }
4573*67e74705SXin Li 
4574*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_storeu_ps(void * __P,__m512 __A)4575*67e74705SXin Li _mm512_storeu_ps(void *__P, __m512 __A)
4576*67e74705SXin Li {
4577*67e74705SXin Li   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4578*67e74705SXin Li }
4579*67e74705SXin Li 
4580*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_store_pd(void * __P,__mmask8 __U,__m512d __A)4581*67e74705SXin Li _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4582*67e74705SXin Li {
4583*67e74705SXin Li   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4584*67e74705SXin Li }
4585*67e74705SXin Li 
4586*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_store_pd(void * __P,__m512d __A)4587*67e74705SXin Li _mm512_store_pd(void *__P, __m512d __A)
4588*67e74705SXin Li {
4589*67e74705SXin Li   *(__m512d*)__P = __A;
4590*67e74705SXin Li }
4591*67e74705SXin Li 
4592*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_mask_store_ps(void * __P,__mmask16 __U,__m512 __A)4593*67e74705SXin Li _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4594*67e74705SXin Li {
4595*67e74705SXin Li   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4596*67e74705SXin Li                                    (__mmask16) __U);
4597*67e74705SXin Li }
4598*67e74705SXin Li 
4599*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_store_ps(void * __P,__m512 __A)4600*67e74705SXin Li _mm512_store_ps(void *__P, __m512 __A)
4601*67e74705SXin Li {
4602*67e74705SXin Li   *(__m512*)__P = __A;
4603*67e74705SXin Li }
4604*67e74705SXin Li 
4605*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_store_si512(void * __P,__m512i __A)4606*67e74705SXin Li _mm512_store_si512 (void *__P, __m512i __A)
4607*67e74705SXin Li {
4608*67e74705SXin Li   *(__m512i *) __P = __A;
4609*67e74705SXin Li }
4610*67e74705SXin Li 
4611*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_store_epi32(void * __P,__m512i __A)4612*67e74705SXin Li _mm512_store_epi32 (void *__P, __m512i __A)
4613*67e74705SXin Li {
4614*67e74705SXin Li   *(__m512i *) __P = __A;
4615*67e74705SXin Li }
4616*67e74705SXin Li 
4617*67e74705SXin Li static __inline void __DEFAULT_FN_ATTRS
_mm512_store_epi64(void * __P,__m512i __A)4618*67e74705SXin Li _mm512_store_epi64 (void *__P, __m512i __A)
4619*67e74705SXin Li {
4620*67e74705SXin Li   *(__m512i *) __P = __A;
4621*67e74705SXin Li }
4622*67e74705SXin Li 
4623*67e74705SXin Li /* Mask ops */
4624*67e74705SXin Li 
4625*67e74705SXin Li static __inline __mmask16 __DEFAULT_FN_ATTRS
_mm512_knot(__mmask16 __M)4626*67e74705SXin Li _mm512_knot(__mmask16 __M)
4627*67e74705SXin Li {
4628*67e74705SXin Li   return __builtin_ia32_knothi(__M);
4629*67e74705SXin Li }
4630*67e74705SXin Li 
4631*67e74705SXin Li /* Integer compare */
4632*67e74705SXin Li 
4633*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi32_mask(__m512i __a,__m512i __b)4634*67e74705SXin Li _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4635*67e74705SXin Li   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4636*67e74705SXin Li                                                    (__mmask16)-1);
4637*67e74705SXin Li }
4638*67e74705SXin Li 
4639*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4640*67e74705SXin Li _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4641*67e74705SXin Li   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4642*67e74705SXin Li                                                    __u);
4643*67e74705SXin Li }
4644*67e74705SXin Li 
4645*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu32_mask(__m512i __a,__m512i __b)4646*67e74705SXin Li _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4647*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4648*67e74705SXin Li                                                  (__mmask16)-1);
4649*67e74705SXin Li }
4650*67e74705SXin Li 
4651*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4652*67e74705SXin Li _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4653*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4654*67e74705SXin Li                                                  __u);
4655*67e74705SXin Li }
4656*67e74705SXin Li 
4657*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4658*67e74705SXin Li _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4659*67e74705SXin Li   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4660*67e74705SXin Li                                                   __u);
4661*67e74705SXin Li }
4662*67e74705SXin Li 
4663*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epi64_mask(__m512i __a,__m512i __b)4664*67e74705SXin Li _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4665*67e74705SXin Li   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4666*67e74705SXin Li                                                   (__mmask8)-1);
4667*67e74705SXin Li }
4668*67e74705SXin Li 
4669*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpeq_epu64_mask(__m512i __a,__m512i __b)4670*67e74705SXin Li _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4671*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4672*67e74705SXin Li                                                 (__mmask8)-1);
4673*67e74705SXin Li }
4674*67e74705SXin Li 
4675*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpeq_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4676*67e74705SXin Li _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4677*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4678*67e74705SXin Li                                                 __u);
4679*67e74705SXin Li }
4680*67e74705SXin Li 
4681*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi32_mask(__m512i __a,__m512i __b)4682*67e74705SXin Li _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4683*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4684*67e74705SXin Li                                                 (__mmask16)-1);
4685*67e74705SXin Li }
4686*67e74705SXin Li 
4687*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4688*67e74705SXin Li _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4689*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4690*67e74705SXin Li                                                 __u);
4691*67e74705SXin Li }
4692*67e74705SXin Li 
4693*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu32_mask(__m512i __a,__m512i __b)4694*67e74705SXin Li _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4695*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4696*67e74705SXin Li                                                  (__mmask16)-1);
4697*67e74705SXin Li }
4698*67e74705SXin Li 
4699*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4700*67e74705SXin Li _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4701*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4702*67e74705SXin Li                                                  __u);
4703*67e74705SXin Li }
4704*67e74705SXin Li 
4705*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpge_epi64_mask(__m512i __a,__m512i __b)4706*67e74705SXin Li _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4707*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4708*67e74705SXin Li                                                (__mmask8)-1);
4709*67e74705SXin Li }
4710*67e74705SXin Li 
4711*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4712*67e74705SXin Li _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4713*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4714*67e74705SXin Li                                                __u);
4715*67e74705SXin Li }
4716*67e74705SXin Li 
4717*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpge_epu64_mask(__m512i __a,__m512i __b)4718*67e74705SXin Li _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4719*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4720*67e74705SXin Li                                                 (__mmask8)-1);
4721*67e74705SXin Li }
4722*67e74705SXin Li 
4723*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpge_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4724*67e74705SXin Li _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4725*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4726*67e74705SXin Li                                                 __u);
4727*67e74705SXin Li }
4728*67e74705SXin Li 
4729*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi32_mask(__m512i __a,__m512i __b)4730*67e74705SXin Li _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4731*67e74705SXin Li   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4732*67e74705SXin Li                                                    (__mmask16)-1);
4733*67e74705SXin Li }
4734*67e74705SXin Li 
4735*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4736*67e74705SXin Li _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4737*67e74705SXin Li   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4738*67e74705SXin Li                                                    __u);
4739*67e74705SXin Li }
4740*67e74705SXin Li 
4741*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu32_mask(__m512i __a,__m512i __b)4742*67e74705SXin Li _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4743*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4744*67e74705SXin Li                                                  (__mmask16)-1);
4745*67e74705SXin Li }
4746*67e74705SXin Li 
4747*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4748*67e74705SXin Li _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4749*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4750*67e74705SXin Li                                                  __u);
4751*67e74705SXin Li }
4752*67e74705SXin Li 
4753*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4754*67e74705SXin Li _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4755*67e74705SXin Li   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4756*67e74705SXin Li                                                   __u);
4757*67e74705SXin Li }
4758*67e74705SXin Li 
4759*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epi64_mask(__m512i __a,__m512i __b)4760*67e74705SXin Li _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4761*67e74705SXin Li   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4762*67e74705SXin Li                                                   (__mmask8)-1);
4763*67e74705SXin Li }
4764*67e74705SXin Li 
4765*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpgt_epu64_mask(__m512i __a,__m512i __b)4766*67e74705SXin Li _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4767*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4768*67e74705SXin Li                                                 (__mmask8)-1);
4769*67e74705SXin Li }
4770*67e74705SXin Li 
4771*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpgt_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4772*67e74705SXin Li _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4773*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4774*67e74705SXin Li                                                 __u);
4775*67e74705SXin Li }
4776*67e74705SXin Li 
4777*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmple_epi32_mask(__m512i __a,__m512i __b)4778*67e74705SXin Li _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4779*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4780*67e74705SXin Li                                                 (__mmask16)-1);
4781*67e74705SXin Li }
4782*67e74705SXin Li 
4783*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4784*67e74705SXin Li _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4785*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4786*67e74705SXin Li                                                 __u);
4787*67e74705SXin Li }
4788*67e74705SXin Li 
4789*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmple_epu32_mask(__m512i __a,__m512i __b)4790*67e74705SXin Li _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
4791*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4792*67e74705SXin Li                                                  (__mmask16)-1);
4793*67e74705SXin Li }
4794*67e74705SXin Li 
4795*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4796*67e74705SXin Li _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4797*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4798*67e74705SXin Li                                                  __u);
4799*67e74705SXin Li }
4800*67e74705SXin Li 
4801*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmple_epi64_mask(__m512i __a,__m512i __b)4802*67e74705SXin Li _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
4803*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4804*67e74705SXin Li                                                (__mmask8)-1);
4805*67e74705SXin Li }
4806*67e74705SXin Li 
4807*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4808*67e74705SXin Li _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4809*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4810*67e74705SXin Li                                                __u);
4811*67e74705SXin Li }
4812*67e74705SXin Li 
4813*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmple_epu64_mask(__m512i __a,__m512i __b)4814*67e74705SXin Li _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
4815*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4816*67e74705SXin Li                                                 (__mmask8)-1);
4817*67e74705SXin Li }
4818*67e74705SXin Li 
4819*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmple_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4820*67e74705SXin Li _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4821*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4822*67e74705SXin Li                                                 __u);
4823*67e74705SXin Li }
4824*67e74705SXin Li 
4825*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi32_mask(__m512i __a,__m512i __b)4826*67e74705SXin Li _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
4827*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4828*67e74705SXin Li                                                 (__mmask16)-1);
4829*67e74705SXin Li }
4830*67e74705SXin Li 
4831*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4832*67e74705SXin Li _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4833*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4834*67e74705SXin Li                                                 __u);
4835*67e74705SXin Li }
4836*67e74705SXin Li 
4837*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu32_mask(__m512i __a,__m512i __b)4838*67e74705SXin Li _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
4839*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4840*67e74705SXin Li                                                  (__mmask16)-1);
4841*67e74705SXin Li }
4842*67e74705SXin Li 
4843*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4844*67e74705SXin Li _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4845*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4846*67e74705SXin Li                                                  __u);
4847*67e74705SXin Li }
4848*67e74705SXin Li 
4849*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmplt_epi64_mask(__m512i __a,__m512i __b)4850*67e74705SXin Li _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
4851*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4852*67e74705SXin Li                                                (__mmask8)-1);
4853*67e74705SXin Li }
4854*67e74705SXin Li 
4855*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4856*67e74705SXin Li _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4857*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4858*67e74705SXin Li                                                __u);
4859*67e74705SXin Li }
4860*67e74705SXin Li 
4861*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmplt_epu64_mask(__m512i __a,__m512i __b)4862*67e74705SXin Li _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
4863*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4864*67e74705SXin Li                                                 (__mmask8)-1);
4865*67e74705SXin Li }
4866*67e74705SXin Li 
4867*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmplt_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4868*67e74705SXin Li _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4869*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4870*67e74705SXin Li                                                 __u);
4871*67e74705SXin Li }
4872*67e74705SXin Li 
4873*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi32_mask(__m512i __a,__m512i __b)4874*67e74705SXin Li _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
4875*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4876*67e74705SXin Li                                                 (__mmask16)-1);
4877*67e74705SXin Li }
4878*67e74705SXin Li 
4879*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi32_mask(__mmask16 __u,__m512i __a,__m512i __b)4880*67e74705SXin Li _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4881*67e74705SXin Li   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4882*67e74705SXin Li                                                 __u);
4883*67e74705SXin Li }
4884*67e74705SXin Li 
4885*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu32_mask(__m512i __a,__m512i __b)4886*67e74705SXin Li _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
4887*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4888*67e74705SXin Li                                                  (__mmask16)-1);
4889*67e74705SXin Li }
4890*67e74705SXin Li 
4891*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu32_mask(__mmask16 __u,__m512i __a,__m512i __b)4892*67e74705SXin Li _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4893*67e74705SXin Li   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4894*67e74705SXin Li                                                  __u);
4895*67e74705SXin Li }
4896*67e74705SXin Li 
4897*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epi64_mask(__m512i __a,__m512i __b)4898*67e74705SXin Li _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
4899*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4900*67e74705SXin Li                                                (__mmask8)-1);
4901*67e74705SXin Li }
4902*67e74705SXin Li 
4903*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epi64_mask(__mmask8 __u,__m512i __a,__m512i __b)4904*67e74705SXin Li _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4905*67e74705SXin Li   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4906*67e74705SXin Li                                                __u);
4907*67e74705SXin Li }
4908*67e74705SXin Li 
4909*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_cmpneq_epu64_mask(__m512i __a,__m512i __b)4910*67e74705SXin Li _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
4911*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4912*67e74705SXin Li                                                 (__mmask8)-1);
4913*67e74705SXin Li }
4914*67e74705SXin Li 
4915*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_cmpneq_epu64_mask(__mmask8 __u,__m512i __a,__m512i __b)4916*67e74705SXin Li _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4917*67e74705SXin Li   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4918*67e74705SXin Li                                                 __u);
4919*67e74705SXin Li }
4920*67e74705SXin Li 
4921*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi8_epi32(__m128i __A)4922*67e74705SXin Li _mm512_cvtepi8_epi32 (__m128i __A)
4923*67e74705SXin Li {
4924*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
4925*67e74705SXin Li                 (__v16si)
4926*67e74705SXin Li                 _mm512_setzero_si512 (),
4927*67e74705SXin Li                 (__mmask16) -1);
4928*67e74705SXin Li }
4929*67e74705SXin Li 
4930*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi8_epi32(__m512i __W,__mmask16 __U,__m128i __A)4931*67e74705SXin Li _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
4932*67e74705SXin Li {
4933*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
4934*67e74705SXin Li                 (__v16si) __W,
4935*67e74705SXin Li                 (__mmask16) __U);
4936*67e74705SXin Li }
4937*67e74705SXin Li 
4938*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi8_epi32(__mmask16 __U,__m128i __A)4939*67e74705SXin Li _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
4940*67e74705SXin Li {
4941*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
4942*67e74705SXin Li                 (__v16si)
4943*67e74705SXin Li                 _mm512_setzero_si512 (),
4944*67e74705SXin Li                 (__mmask16) __U);
4945*67e74705SXin Li }
4946*67e74705SXin Li 
4947*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi8_epi64(__m128i __A)4948*67e74705SXin Li _mm512_cvtepi8_epi64 (__m128i __A)
4949*67e74705SXin Li {
4950*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
4951*67e74705SXin Li                 (__v8di)
4952*67e74705SXin Li                 _mm512_setzero_si512 (),
4953*67e74705SXin Li                 (__mmask8) -1);
4954*67e74705SXin Li }
4955*67e74705SXin Li 
4956*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi8_epi64(__m512i __W,__mmask8 __U,__m128i __A)4957*67e74705SXin Li _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
4958*67e74705SXin Li {
4959*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
4960*67e74705SXin Li                 (__v8di) __W,
4961*67e74705SXin Li                 (__mmask8) __U);
4962*67e74705SXin Li }
4963*67e74705SXin Li 
4964*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi8_epi64(__mmask8 __U,__m128i __A)4965*67e74705SXin Li _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
4966*67e74705SXin Li {
4967*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
4968*67e74705SXin Li                 (__v8di)
4969*67e74705SXin Li                 _mm512_setzero_si512 (),
4970*67e74705SXin Li                 (__mmask8) __U);
4971*67e74705SXin Li }
4972*67e74705SXin Li 
4973*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi32_epi64(__m256i __X)4974*67e74705SXin Li _mm512_cvtepi32_epi64 (__m256i __X)
4975*67e74705SXin Li {
4976*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
4977*67e74705SXin Li                 (__v8di)
4978*67e74705SXin Li                 _mm512_setzero_si512 (),
4979*67e74705SXin Li                 (__mmask8) -1);
4980*67e74705SXin Li }
4981*67e74705SXin Li 
4982*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_epi64(__m512i __W,__mmask8 __U,__m256i __X)4983*67e74705SXin Li _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
4984*67e74705SXin Li {
4985*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
4986*67e74705SXin Li                 (__v8di) __W,
4987*67e74705SXin Li                 (__mmask8) __U);
4988*67e74705SXin Li }
4989*67e74705SXin Li 
4990*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi32_epi64(__mmask8 __U,__m256i __X)4991*67e74705SXin Li _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
4992*67e74705SXin Li {
4993*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
4994*67e74705SXin Li                 (__v8di)
4995*67e74705SXin Li                 _mm512_setzero_si512 (),
4996*67e74705SXin Li                 (__mmask8) __U);
4997*67e74705SXin Li }
4998*67e74705SXin Li 
4999*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi16_epi32(__m256i __A)5000*67e74705SXin Li _mm512_cvtepi16_epi32 (__m256i __A)
5001*67e74705SXin Li {
5002*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5003*67e74705SXin Li                 (__v16si)
5004*67e74705SXin Li                 _mm512_setzero_si512 (),
5005*67e74705SXin Li                 (__mmask16) -1);
5006*67e74705SXin Li }
5007*67e74705SXin Li 
5008*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi16_epi32(__m512i __W,__mmask16 __U,__m256i __A)5009*67e74705SXin Li _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
5010*67e74705SXin Li {
5011*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5012*67e74705SXin Li                 (__v16si) __W,
5013*67e74705SXin Li                 (__mmask16) __U);
5014*67e74705SXin Li }
5015*67e74705SXin Li 
5016*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi16_epi32(__mmask16 __U,__m256i __A)5017*67e74705SXin Li _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
5018*67e74705SXin Li {
5019*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5020*67e74705SXin Li                 (__v16si)
5021*67e74705SXin Li                 _mm512_setzero_si512 (),
5022*67e74705SXin Li                 (__mmask16) __U);
5023*67e74705SXin Li }
5024*67e74705SXin Li 
5025*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepi16_epi64(__m128i __A)5026*67e74705SXin Li _mm512_cvtepi16_epi64 (__m128i __A)
5027*67e74705SXin Li {
5028*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5029*67e74705SXin Li                 (__v8di)
5030*67e74705SXin Li                 _mm512_setzero_si512 (),
5031*67e74705SXin Li                 (__mmask8) -1);
5032*67e74705SXin Li }
5033*67e74705SXin Li 
5034*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi16_epi64(__m512i __W,__mmask8 __U,__m128i __A)5035*67e74705SXin Li _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5036*67e74705SXin Li {
5037*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5038*67e74705SXin Li                 (__v8di) __W,
5039*67e74705SXin Li                 (__mmask8) __U);
5040*67e74705SXin Li }
5041*67e74705SXin Li 
5042*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi16_epi64(__mmask8 __U,__m128i __A)5043*67e74705SXin Li _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
5044*67e74705SXin Li {
5045*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5046*67e74705SXin Li                 (__v8di)
5047*67e74705SXin Li                 _mm512_setzero_si512 (),
5048*67e74705SXin Li                 (__mmask8) __U);
5049*67e74705SXin Li }
5050*67e74705SXin Li 
5051*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepu8_epi32(__m128i __A)5052*67e74705SXin Li _mm512_cvtepu8_epi32 (__m128i __A)
5053*67e74705SXin Li {
5054*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5055*67e74705SXin Li                 (__v16si)
5056*67e74705SXin Li                 _mm512_setzero_si512 (),
5057*67e74705SXin Li                 (__mmask16) -1);
5058*67e74705SXin Li }
5059*67e74705SXin Li 
5060*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu8_epi32(__m512i __W,__mmask16 __U,__m128i __A)5061*67e74705SXin Li _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
5062*67e74705SXin Li {
5063*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5064*67e74705SXin Li                 (__v16si) __W,
5065*67e74705SXin Li                 (__mmask16) __U);
5066*67e74705SXin Li }
5067*67e74705SXin Li 
5068*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu8_epi32(__mmask16 __U,__m128i __A)5069*67e74705SXin Li _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
5070*67e74705SXin Li {
5071*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5072*67e74705SXin Li                 (__v16si)
5073*67e74705SXin Li                 _mm512_setzero_si512 (),
5074*67e74705SXin Li                 (__mmask16) __U);
5075*67e74705SXin Li }
5076*67e74705SXin Li 
5077*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepu8_epi64(__m128i __A)5078*67e74705SXin Li _mm512_cvtepu8_epi64 (__m128i __A)
5079*67e74705SXin Li {
5080*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5081*67e74705SXin Li                 (__v8di)
5082*67e74705SXin Li                 _mm512_setzero_si512 (),
5083*67e74705SXin Li                 (__mmask8) -1);
5084*67e74705SXin Li }
5085*67e74705SXin Li 
5086*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu8_epi64(__m512i __W,__mmask8 __U,__m128i __A)5087*67e74705SXin Li _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5088*67e74705SXin Li {
5089*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5090*67e74705SXin Li                 (__v8di) __W,
5091*67e74705SXin Li                 (__mmask8) __U);
5092*67e74705SXin Li }
5093*67e74705SXin Li 
5094*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu8_epi64(__mmask8 __U,__m128i __A)5095*67e74705SXin Li _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
5096*67e74705SXin Li {
5097*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5098*67e74705SXin Li                 (__v8di)
5099*67e74705SXin Li                 _mm512_setzero_si512 (),
5100*67e74705SXin Li                 (__mmask8) __U);
5101*67e74705SXin Li }
5102*67e74705SXin Li 
5103*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepu32_epi64(__m256i __X)5104*67e74705SXin Li _mm512_cvtepu32_epi64 (__m256i __X)
5105*67e74705SXin Li {
5106*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5107*67e74705SXin Li                 (__v8di)
5108*67e74705SXin Li                 _mm512_setzero_si512 (),
5109*67e74705SXin Li                 (__mmask8) -1);
5110*67e74705SXin Li }
5111*67e74705SXin Li 
5112*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu32_epi64(__m512i __W,__mmask8 __U,__m256i __X)5113*67e74705SXin Li _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
5114*67e74705SXin Li {
5115*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5116*67e74705SXin Li                 (__v8di) __W,
5117*67e74705SXin Li                 (__mmask8) __U);
5118*67e74705SXin Li }
5119*67e74705SXin Li 
5120*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu32_epi64(__mmask8 __U,__m256i __X)5121*67e74705SXin Li _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
5122*67e74705SXin Li {
5123*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5124*67e74705SXin Li                 (__v8di)
5125*67e74705SXin Li                 _mm512_setzero_si512 (),
5126*67e74705SXin Li                 (__mmask8) __U);
5127*67e74705SXin Li }
5128*67e74705SXin Li 
5129*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepu16_epi32(__m256i __A)5130*67e74705SXin Li _mm512_cvtepu16_epi32 (__m256i __A)
5131*67e74705SXin Li {
5132*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5133*67e74705SXin Li                 (__v16si)
5134*67e74705SXin Li                 _mm512_setzero_si512 (),
5135*67e74705SXin Li                 (__mmask16) -1);
5136*67e74705SXin Li }
5137*67e74705SXin Li 
5138*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu16_epi32(__m512i __W,__mmask16 __U,__m256i __A)5139*67e74705SXin Li _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
5140*67e74705SXin Li {
5141*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5142*67e74705SXin Li                 (__v16si) __W,
5143*67e74705SXin Li                 (__mmask16) __U);
5144*67e74705SXin Li }
5145*67e74705SXin Li 
5146*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu16_epi32(__mmask16 __U,__m256i __A)5147*67e74705SXin Li _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
5148*67e74705SXin Li {
5149*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5150*67e74705SXin Li                 (__v16si)
5151*67e74705SXin Li                 _mm512_setzero_si512 (),
5152*67e74705SXin Li                 (__mmask16) __U);
5153*67e74705SXin Li }
5154*67e74705SXin Li 
5155*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtepu16_epi64(__m128i __A)5156*67e74705SXin Li _mm512_cvtepu16_epi64 (__m128i __A)
5157*67e74705SXin Li {
5158*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5159*67e74705SXin Li                 (__v8di)
5160*67e74705SXin Li                 _mm512_setzero_si512 (),
5161*67e74705SXin Li                 (__mmask8) -1);
5162*67e74705SXin Li }
5163*67e74705SXin Li 
5164*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu16_epi64(__m512i __W,__mmask8 __U,__m128i __A)5165*67e74705SXin Li _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5166*67e74705SXin Li {
5167*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5168*67e74705SXin Li                 (__v8di) __W,
5169*67e74705SXin Li                 (__mmask8) __U);
5170*67e74705SXin Li }
5171*67e74705SXin Li 
5172*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu16_epi64(__mmask8 __U,__m128i __A)5173*67e74705SXin Li _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
5174*67e74705SXin Li {
5175*67e74705SXin Li   return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5176*67e74705SXin Li                 (__v8di)
5177*67e74705SXin Li                 _mm512_setzero_si512 (),
5178*67e74705SXin Li                 (__mmask8) __U);
5179*67e74705SXin Li }
5180*67e74705SXin Li 
5181*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_rorv_epi32(__m512i __A,__m512i __B)5182*67e74705SXin Li _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5183*67e74705SXin Li {
5184*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5185*67e74705SXin Li               (__v16si) __B,
5186*67e74705SXin Li               (__v16si)
5187*67e74705SXin Li               _mm512_setzero_si512 (),
5188*67e74705SXin Li               (__mmask16) -1);
5189*67e74705SXin Li }
5190*67e74705SXin Li 
5191*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_rorv_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)5192*67e74705SXin Li _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5193*67e74705SXin Li {
5194*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5195*67e74705SXin Li               (__v16si) __B,
5196*67e74705SXin Li               (__v16si) __W,
5197*67e74705SXin Li               (__mmask16) __U);
5198*67e74705SXin Li }
5199*67e74705SXin Li 
5200*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_rorv_epi32(__mmask16 __U,__m512i __A,__m512i __B)5201*67e74705SXin Li _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5202*67e74705SXin Li {
5203*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5204*67e74705SXin Li               (__v16si) __B,
5205*67e74705SXin Li               (__v16si)
5206*67e74705SXin Li               _mm512_setzero_si512 (),
5207*67e74705SXin Li               (__mmask16) __U);
5208*67e74705SXin Li }
5209*67e74705SXin Li 
5210*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_rorv_epi64(__m512i __A,__m512i __B)5211*67e74705SXin Li _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5212*67e74705SXin Li {
5213*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5214*67e74705SXin Li               (__v8di) __B,
5215*67e74705SXin Li               (__v8di)
5216*67e74705SXin Li               _mm512_setzero_si512 (),
5217*67e74705SXin Li               (__mmask8) -1);
5218*67e74705SXin Li }
5219*67e74705SXin Li 
5220*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_rorv_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)5221*67e74705SXin Li _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5222*67e74705SXin Li {
5223*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5224*67e74705SXin Li               (__v8di) __B,
5225*67e74705SXin Li               (__v8di) __W,
5226*67e74705SXin Li               (__mmask8) __U);
5227*67e74705SXin Li }
5228*67e74705SXin Li 
5229*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_rorv_epi64(__mmask8 __U,__m512i __A,__m512i __B)5230*67e74705SXin Li _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5231*67e74705SXin Li {
5232*67e74705SXin Li   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5233*67e74705SXin Li               (__v8di) __B,
5234*67e74705SXin Li               (__v8di)
5235*67e74705SXin Li               _mm512_setzero_si512 (),
5236*67e74705SXin Li               (__mmask8) __U);
5237*67e74705SXin Li }
5238*67e74705SXin Li 
5239*67e74705SXin Li 
5240*67e74705SXin Li 
5241*67e74705SXin Li #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5242*67e74705SXin Li   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5243*67e74705SXin Li                                          (__v16si)(__m512i)(b), (int)(p), \
5244*67e74705SXin Li                                          (__mmask16)-1); })
5245*67e74705SXin Li 
5246*67e74705SXin Li #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5247*67e74705SXin Li   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5248*67e74705SXin Li                                           (__v16si)(__m512i)(b), (int)(p), \
5249*67e74705SXin Li                                           (__mmask16)-1); })
5250*67e74705SXin Li 
5251*67e74705SXin Li #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5252*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5253*67e74705SXin Li                                         (__v8di)(__m512i)(b), (int)(p), \
5254*67e74705SXin Li                                         (__mmask8)-1); })
5255*67e74705SXin Li 
5256*67e74705SXin Li #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5257*67e74705SXin Li   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5258*67e74705SXin Li                                          (__v8di)(__m512i)(b), (int)(p), \
5259*67e74705SXin Li                                          (__mmask8)-1); })
5260*67e74705SXin Li 
5261*67e74705SXin Li #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5262*67e74705SXin Li   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5263*67e74705SXin Li                                          (__v16si)(__m512i)(b), (int)(p), \
5264*67e74705SXin Li                                          (__mmask16)(m)); })
5265*67e74705SXin Li 
5266*67e74705SXin Li #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5267*67e74705SXin Li   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5268*67e74705SXin Li                                           (__v16si)(__m512i)(b), (int)(p), \
5269*67e74705SXin Li                                           (__mmask16)(m)); })
5270*67e74705SXin Li 
5271*67e74705SXin Li #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5272*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5273*67e74705SXin Li                                         (__v8di)(__m512i)(b), (int)(p), \
5274*67e74705SXin Li                                         (__mmask8)(m)); })
5275*67e74705SXin Li 
5276*67e74705SXin Li #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5277*67e74705SXin Li   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5278*67e74705SXin Li                                          (__v8di)(__m512i)(b), (int)(p), \
5279*67e74705SXin Li                                          (__mmask8)(m)); })
5280*67e74705SXin Li 
5281*67e74705SXin Li #define _mm512_rol_epi32(a, b) __extension__ ({ \
5282*67e74705SXin Li   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5283*67e74705SXin Li                                         (__v16si)_mm512_setzero_si512(), \
5284*67e74705SXin Li                                         (__mmask16)-1); })
5285*67e74705SXin Li 
5286*67e74705SXin Li #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5287*67e74705SXin Li   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5288*67e74705SXin Li                                         (__v16si)(__m512i)(W), \
5289*67e74705SXin Li                                         (__mmask16)(U)); })
5290*67e74705SXin Li 
5291*67e74705SXin Li #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5292*67e74705SXin Li   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5293*67e74705SXin Li                                         (__v16si)_mm512_setzero_si512(), \
5294*67e74705SXin Li                                         (__mmask16)(U)); })
5295*67e74705SXin Li 
5296*67e74705SXin Li #define _mm512_rol_epi64(a, b) __extension__ ({ \
5297*67e74705SXin Li   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5298*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512(), \
5299*67e74705SXin Li                                         (__mmask8)-1); })
5300*67e74705SXin Li 
5301*67e74705SXin Li #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5302*67e74705SXin Li   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5303*67e74705SXin Li                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5304*67e74705SXin Li 
5305*67e74705SXin Li #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5306*67e74705SXin Li   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5307*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512(), \
5308*67e74705SXin Li                                         (__mmask8)(U)); })
5309*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_rolv_epi32(__m512i __A,__m512i __B)5310*67e74705SXin Li _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5311*67e74705SXin Li {
5312*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5313*67e74705SXin Li               (__v16si) __B,
5314*67e74705SXin Li               (__v16si)
5315*67e74705SXin Li               _mm512_setzero_si512 (),
5316*67e74705SXin Li               (__mmask16) -1);
5317*67e74705SXin Li }
5318*67e74705SXin Li 
5319*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_rolv_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m512i __B)5320*67e74705SXin Li _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5321*67e74705SXin Li {
5322*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5323*67e74705SXin Li               (__v16si) __B,
5324*67e74705SXin Li               (__v16si) __W,
5325*67e74705SXin Li               (__mmask16) __U);
5326*67e74705SXin Li }
5327*67e74705SXin Li 
5328*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_rolv_epi32(__mmask16 __U,__m512i __A,__m512i __B)5329*67e74705SXin Li _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5330*67e74705SXin Li {
5331*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5332*67e74705SXin Li               (__v16si) __B,
5333*67e74705SXin Li               (__v16si)
5334*67e74705SXin Li               _mm512_setzero_si512 (),
5335*67e74705SXin Li               (__mmask16) __U);
5336*67e74705SXin Li }
5337*67e74705SXin Li 
5338*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_rolv_epi64(__m512i __A,__m512i __B)5339*67e74705SXin Li _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5340*67e74705SXin Li {
5341*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5342*67e74705SXin Li               (__v8di) __B,
5343*67e74705SXin Li               (__v8di)
5344*67e74705SXin Li               _mm512_setzero_si512 (),
5345*67e74705SXin Li               (__mmask8) -1);
5346*67e74705SXin Li }
5347*67e74705SXin Li 
5348*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_rolv_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)5349*67e74705SXin Li _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5350*67e74705SXin Li {
5351*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5352*67e74705SXin Li               (__v8di) __B,
5353*67e74705SXin Li               (__v8di) __W,
5354*67e74705SXin Li               (__mmask8) __U);
5355*67e74705SXin Li }
5356*67e74705SXin Li 
5357*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_rolv_epi64(__mmask8 __U,__m512i __A,__m512i __B)5358*67e74705SXin Li _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5359*67e74705SXin Li {
5360*67e74705SXin Li   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5361*67e74705SXin Li               (__v8di) __B,
5362*67e74705SXin Li               (__v8di)
5363*67e74705SXin Li               _mm512_setzero_si512 (),
5364*67e74705SXin Li               (__mmask8) __U);
5365*67e74705SXin Li }
5366*67e74705SXin Li 
5367*67e74705SXin Li #define _mm512_ror_epi32(A, B) __extension__ ({ \
5368*67e74705SXin Li   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5369*67e74705SXin Li                                         (__v16si)_mm512_setzero_si512(), \
5370*67e74705SXin Li                                         (__mmask16)-1); })
5371*67e74705SXin Li 
5372*67e74705SXin Li #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5373*67e74705SXin Li   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5374*67e74705SXin Li                                         (__v16si)(__m512i)(W), \
5375*67e74705SXin Li                                         (__mmask16)(U)); })
5376*67e74705SXin Li 
5377*67e74705SXin Li #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5378*67e74705SXin Li   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5379*67e74705SXin Li                                         (__v16si)_mm512_setzero_si512(), \
5380*67e74705SXin Li                                         (__mmask16)(U)); })
5381*67e74705SXin Li 
5382*67e74705SXin Li #define _mm512_ror_epi64(A, B) __extension__ ({ \
5383*67e74705SXin Li   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5384*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512(), \
5385*67e74705SXin Li                                         (__mmask8)-1); })
5386*67e74705SXin Li 
5387*67e74705SXin Li #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5388*67e74705SXin Li   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5389*67e74705SXin Li                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5390*67e74705SXin Li 
5391*67e74705SXin Li #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5392*67e74705SXin Li   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5393*67e74705SXin Li                                         (__v8di)_mm512_setzero_si512(), \
5394*67e74705SXin Li                                         (__mmask8)(U)); })
5395*67e74705SXin Li 
5396*67e74705SXin Li #define _mm512_slli_epi32(A, B) __extension__ ({ \
5397*67e74705SXin Li   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5398*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
5399*67e74705SXin Li                                          (__mmask16)-1); })
5400*67e74705SXin Li 
5401*67e74705SXin Li #define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5402*67e74705SXin Li   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5403*67e74705SXin Li                                          (__v16si)(__m512i)(W), \
5404*67e74705SXin Li                                          (__mmask16)(U)); })
5405*67e74705SXin Li 
5406*67e74705SXin Li #define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \
5407*67e74705SXin Li   (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5408*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
5409*67e74705SXin Li                                          (__mmask16)(U)); })
5410*67e74705SXin Li 
5411*67e74705SXin Li #define _mm512_slli_epi64(A, B) __extension__ ({ \
5412*67e74705SXin Li   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5413*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
5414*67e74705SXin Li                                          (__mmask8)-1); })
5415*67e74705SXin Li 
5416*67e74705SXin Li #define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5417*67e74705SXin Li   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5418*67e74705SXin Li                                          (__v8di)(__m512i)(W), \
5419*67e74705SXin Li                                          (__mmask8)(U)); })
5420*67e74705SXin Li 
5421*67e74705SXin Li #define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \
5422*67e74705SXin Li   (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5423*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
5424*67e74705SXin Li                                          (__mmask8)(U)); })
5425*67e74705SXin Li 
5426*67e74705SXin Li 
5427*67e74705SXin Li 
5428*67e74705SXin Li #define _mm512_srli_epi32(A, B) __extension__ ({ \
5429*67e74705SXin Li   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5430*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
5431*67e74705SXin Li                                          (__mmask16)-1); })
5432*67e74705SXin Li 
5433*67e74705SXin Li #define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \
5434*67e74705SXin Li   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5435*67e74705SXin Li                                          (__v16si)(__m512i)(W), \
5436*67e74705SXin Li                                          (__mmask16)(U)); })
5437*67e74705SXin Li 
5438*67e74705SXin Li #define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \
5439*67e74705SXin Li   (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5440*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
5441*67e74705SXin Li                                          (__mmask16)(U)); })
5442*67e74705SXin Li 
5443*67e74705SXin Li #define _mm512_srli_epi64(A, B) __extension__ ({ \
5444*67e74705SXin Li   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5445*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
5446*67e74705SXin Li                                          (__mmask8)-1); })
5447*67e74705SXin Li 
5448*67e74705SXin Li #define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \
5449*67e74705SXin Li   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5450*67e74705SXin Li                                          (__v8di)(__m512i)(W), \
5451*67e74705SXin Li                                          (__mmask8)(U)); })
5452*67e74705SXin Li 
5453*67e74705SXin Li #define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \
5454*67e74705SXin Li   (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5455*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
5456*67e74705SXin Li                                          (__mmask8)(U)); })
5457*67e74705SXin Li 
5458*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_load_epi32(__m512i __W,__mmask16 __U,void const * __P)5459*67e74705SXin Li _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5460*67e74705SXin Li {
5461*67e74705SXin Li   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5462*67e74705SXin Li               (__v16si) __W,
5463*67e74705SXin Li               (__mmask16) __U);
5464*67e74705SXin Li }
5465*67e74705SXin Li 
5466*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_load_epi32(__mmask16 __U,void const * __P)5467*67e74705SXin Li _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5468*67e74705SXin Li {
5469*67e74705SXin Li   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5470*67e74705SXin Li               (__v16si)
5471*67e74705SXin Li               _mm512_setzero_si512 (),
5472*67e74705SXin Li               (__mmask16) __U);
5473*67e74705SXin Li }
5474*67e74705SXin Li 
5475*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_store_epi32(void * __P,__mmask16 __U,__m512i __A)5476*67e74705SXin Li _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5477*67e74705SXin Li {
5478*67e74705SXin Li   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5479*67e74705SXin Li           (__mmask16) __U);
5480*67e74705SXin Li }
5481*67e74705SXin Li 
5482*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mov_epi32(__m512i __W,__mmask16 __U,__m512i __A)5483*67e74705SXin Li _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5484*67e74705SXin Li {
5485*67e74705SXin Li   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5486*67e74705SXin Li                  (__v16si) __A,
5487*67e74705SXin Li                  (__v16si) __W);
5488*67e74705SXin Li }
5489*67e74705SXin Li 
5490*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mov_epi32(__mmask16 __U,__m512i __A)5491*67e74705SXin Li _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5492*67e74705SXin Li {
5493*67e74705SXin Li   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5494*67e74705SXin Li                  (__v16si) __A,
5495*67e74705SXin Li                  (__v16si) _mm512_setzero_si512 ());
5496*67e74705SXin Li }
5497*67e74705SXin Li 
5498*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mov_epi64(__m512i __W,__mmask8 __U,__m512i __A)5499*67e74705SXin Li _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5500*67e74705SXin Li {
5501*67e74705SXin Li   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5502*67e74705SXin Li                  (__v8di) __A,
5503*67e74705SXin Li                  (__v8di) __W);
5504*67e74705SXin Li }
5505*67e74705SXin Li 
5506*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mov_epi64(__mmask8 __U,__m512i __A)5507*67e74705SXin Li _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5508*67e74705SXin Li {
5509*67e74705SXin Li   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5510*67e74705SXin Li                  (__v8di) __A,
5511*67e74705SXin Li                  (__v8di) _mm512_setzero_si512 ());
5512*67e74705SXin Li }
5513*67e74705SXin Li 
5514*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_load_epi64(__m512i __W,__mmask8 __U,void const * __P)5515*67e74705SXin Li _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5516*67e74705SXin Li {
5517*67e74705SXin Li   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5518*67e74705SXin Li               (__v8di) __W,
5519*67e74705SXin Li               (__mmask8) __U);
5520*67e74705SXin Li }
5521*67e74705SXin Li 
5522*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_load_epi64(__mmask8 __U,void const * __P)5523*67e74705SXin Li _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5524*67e74705SXin Li {
5525*67e74705SXin Li   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5526*67e74705SXin Li               (__v8di)
5527*67e74705SXin Li               _mm512_setzero_si512 (),
5528*67e74705SXin Li               (__mmask8) __U);
5529*67e74705SXin Li }
5530*67e74705SXin Li 
5531*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_store_epi64(void * __P,__mmask8 __U,__m512i __A)5532*67e74705SXin Li _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5533*67e74705SXin Li {
5534*67e74705SXin Li   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5535*67e74705SXin Li           (__mmask8) __U);
5536*67e74705SXin Li }
5537*67e74705SXin Li 
5538*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_movedup_pd(__m512d __A)5539*67e74705SXin Li _mm512_movedup_pd (__m512d __A)
5540*67e74705SXin Li {
5541*67e74705SXin Li   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5542*67e74705SXin Li                                           0, 0, 2, 2, 4, 4, 6, 6);
5543*67e74705SXin Li }
5544*67e74705SXin Li 
5545*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_movedup_pd(__m512d __W,__mmask8 __U,__m512d __A)5546*67e74705SXin Li _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5547*67e74705SXin Li {
5548*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5549*67e74705SXin Li                                               (__v8df)_mm512_movedup_pd(__A),
5550*67e74705SXin Li                                               (__v8df)__W);
5551*67e74705SXin Li }
5552*67e74705SXin Li 
5553*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_movedup_pd(__mmask8 __U,__m512d __A)5554*67e74705SXin Li _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5555*67e74705SXin Li {
5556*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5557*67e74705SXin Li                                               (__v8df)_mm512_movedup_pd(__A),
5558*67e74705SXin Li                                               (__v8df)_mm512_setzero_pd());
5559*67e74705SXin Li }
5560*67e74705SXin Li 
5561*67e74705SXin Li #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5562*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5563*67e74705SXin Li                                              (__v8df)(__m512d)(B), \
5564*67e74705SXin Li                                              (__v8di)(__m512i)(C), (int)(imm), \
5565*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
5566*67e74705SXin Li 
5567*67e74705SXin Li #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5568*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5569*67e74705SXin Li                                              (__v8df)(__m512d)(B), \
5570*67e74705SXin Li                                              (__v8di)(__m512i)(C), (int)(imm), \
5571*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
5572*67e74705SXin Li 
5573*67e74705SXin Li #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5574*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5575*67e74705SXin Li                                              (__v8df)(__m512d)(B), \
5576*67e74705SXin Li                                              (__v8di)(__m512i)(C), (int)(imm), \
5577*67e74705SXin Li                                              (__mmask8)-1, \
5578*67e74705SXin Li                                              _MM_FROUND_CUR_DIRECTION); })
5579*67e74705SXin Li 
5580*67e74705SXin Li #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5581*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5582*67e74705SXin Li                                              (__v8df)(__m512d)(B), \
5583*67e74705SXin Li                                              (__v8di)(__m512i)(C), (int)(imm), \
5584*67e74705SXin Li                                              (__mmask8)(U), \
5585*67e74705SXin Li                                              _MM_FROUND_CUR_DIRECTION); })
5586*67e74705SXin Li 
5587*67e74705SXin Li #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5588*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5589*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
5590*67e74705SXin Li                                               (__v8di)(__m512i)(C), \
5591*67e74705SXin Li                                               (int)(imm), (__mmask8)(U), \
5592*67e74705SXin Li                                               (int)(R)); })
5593*67e74705SXin Li 
5594*67e74705SXin Li #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5595*67e74705SXin Li   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5596*67e74705SXin Li                                               (__v8df)(__m512d)(B), \
5597*67e74705SXin Li                                               (__v8di)(__m512i)(C), \
5598*67e74705SXin Li                                               (int)(imm), (__mmask8)(U), \
5599*67e74705SXin Li                                               _MM_FROUND_CUR_DIRECTION); })
5600*67e74705SXin Li 
5601*67e74705SXin Li #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5602*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5603*67e74705SXin Li                                             (__v16sf)(__m512)(B), \
5604*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
5605*67e74705SXin Li                                             (__mmask16)-1, (int)(R)); })
5606*67e74705SXin Li 
5607*67e74705SXin Li #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5608*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5609*67e74705SXin Li                                             (__v16sf)(__m512)(B), \
5610*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
5611*67e74705SXin Li                                             (__mmask16)(U), (int)(R)); })
5612*67e74705SXin Li 
5613*67e74705SXin Li #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5614*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5615*67e74705SXin Li                                             (__v16sf)(__m512)(B), \
5616*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
5617*67e74705SXin Li                                             (__mmask16)-1, \
5618*67e74705SXin Li                                             _MM_FROUND_CUR_DIRECTION); })
5619*67e74705SXin Li 
5620*67e74705SXin Li #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5621*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5622*67e74705SXin Li                                             (__v16sf)(__m512)(B), \
5623*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
5624*67e74705SXin Li                                             (__mmask16)(U), \
5625*67e74705SXin Li                                             _MM_FROUND_CUR_DIRECTION); })
5626*67e74705SXin Li 
5627*67e74705SXin Li #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5628*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5629*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
5630*67e74705SXin Li                                              (__v16si)(__m512i)(C), \
5631*67e74705SXin Li                                              (int)(imm), (__mmask16)(U), \
5632*67e74705SXin Li                                              (int)(R)); })
5633*67e74705SXin Li 
5634*67e74705SXin Li #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5635*67e74705SXin Li   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5636*67e74705SXin Li                                              (__v16sf)(__m512)(B), \
5637*67e74705SXin Li                                              (__v16si)(__m512i)(C), \
5638*67e74705SXin Li                                              (int)(imm), (__mmask16)(U), \
5639*67e74705SXin Li                                              _MM_FROUND_CUR_DIRECTION); })
5640*67e74705SXin Li 
5641*67e74705SXin Li #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5642*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5643*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
5644*67e74705SXin Li                                           (__v2di)(__m128i)(C), (int)(imm), \
5645*67e74705SXin Li                                           (__mmask8)-1, (int)(R)); })
5646*67e74705SXin Li 
5647*67e74705SXin Li #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5648*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5649*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
5650*67e74705SXin Li                                           (__v2di)(__m128i)(C), (int)(imm), \
5651*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
5652*67e74705SXin Li 
5653*67e74705SXin Li #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5654*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5655*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
5656*67e74705SXin Li                                           (__v2di)(__m128i)(C), (int)(imm), \
5657*67e74705SXin Li                                           (__mmask8)-1, \
5658*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
5659*67e74705SXin Li 
5660*67e74705SXin Li #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5661*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5662*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
5663*67e74705SXin Li                                           (__v2di)(__m128i)(C), (int)(imm), \
5664*67e74705SXin Li                                           (__mmask8)(U), \
5665*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
5666*67e74705SXin Li 
5667*67e74705SXin Li #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5668*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5669*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
5670*67e74705SXin Li                                            (__v2di)(__m128i)(C), (int)(imm), \
5671*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
5672*67e74705SXin Li 
5673*67e74705SXin Li #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5674*67e74705SXin Li   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5675*67e74705SXin Li                                            (__v2df)(__m128d)(B), \
5676*67e74705SXin Li                                            (__v2di)(__m128i)(C), (int)(imm), \
5677*67e74705SXin Li                                            (__mmask8)(U), \
5678*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
5679*67e74705SXin Li 
5680*67e74705SXin Li #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5681*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5682*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
5683*67e74705SXin Li                                          (__v4si)(__m128i)(C), (int)(imm), \
5684*67e74705SXin Li                                          (__mmask8)-1, (int)(R)); })
5685*67e74705SXin Li 
5686*67e74705SXin Li #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5687*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5688*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
5689*67e74705SXin Li                                          (__v4si)(__m128i)(C), (int)(imm), \
5690*67e74705SXin Li                                          (__mmask8)(U), (int)(R)); })
5691*67e74705SXin Li 
5692*67e74705SXin Li #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5693*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5694*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
5695*67e74705SXin Li                                          (__v4si)(__m128i)(C), (int)(imm), \
5696*67e74705SXin Li                                          (__mmask8)-1, \
5697*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
5698*67e74705SXin Li 
5699*67e74705SXin Li #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5700*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5701*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
5702*67e74705SXin Li                                          (__v4si)(__m128i)(C), (int)(imm), \
5703*67e74705SXin Li                                          (__mmask8)(U), \
5704*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
5705*67e74705SXin Li 
5706*67e74705SXin Li #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5707*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5708*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
5709*67e74705SXin Li                                           (__v4si)(__m128i)(C), (int)(imm), \
5710*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
5711*67e74705SXin Li 
5712*67e74705SXin Li #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5713*67e74705SXin Li   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5714*67e74705SXin Li                                           (__v4sf)(__m128)(B), \
5715*67e74705SXin Li                                           (__v4si)(__m128i)(C), (int)(imm), \
5716*67e74705SXin Li                                           (__mmask8)(U), \
5717*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
5718*67e74705SXin Li 
5719*67e74705SXin Li #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5720*67e74705SXin Li   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5721*67e74705SXin Li                                                  (__v2df)(__m128d)(B), \
5722*67e74705SXin Li                                                  (__v2df)_mm_setzero_pd(), \
5723*67e74705SXin Li                                                  (__mmask8)-1, (int)(R)); })
5724*67e74705SXin Li 
5725*67e74705SXin Li 
5726*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_getexp_sd(__m128d __A,__m128d __B)5727*67e74705SXin Li _mm_getexp_sd (__m128d __A, __m128d __B)
5728*67e74705SXin Li {
5729*67e74705SXin Li   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5730*67e74705SXin Li                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5731*67e74705SXin Li }
5732*67e74705SXin Li 
5733*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_getexp_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)5734*67e74705SXin Li _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5735*67e74705SXin Li {
5736*67e74705SXin Li  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5737*67e74705SXin Li           (__v2df) __B,
5738*67e74705SXin Li           (__v2df) __W,
5739*67e74705SXin Li           (__mmask8) __U,
5740*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
5741*67e74705SXin Li }
5742*67e74705SXin Li 
5743*67e74705SXin Li #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5744*67e74705SXin Li   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5745*67e74705SXin Li                                                  (__v2df)(__m128d)(B), \
5746*67e74705SXin Li                                                  (__v2df)(__m128d)(W), \
5747*67e74705SXin Li                                                  (__mmask8)(U), (int)(R)); })
5748*67e74705SXin Li 
5749*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_getexp_sd(__mmask8 __U,__m128d __A,__m128d __B)5750*67e74705SXin Li _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5751*67e74705SXin Li {
5752*67e74705SXin Li  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5753*67e74705SXin Li           (__v2df) __B,
5754*67e74705SXin Li           (__v2df) _mm_setzero_pd (),
5755*67e74705SXin Li           (__mmask8) __U,
5756*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
5757*67e74705SXin Li }
5758*67e74705SXin Li 
5759*67e74705SXin Li #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5760*67e74705SXin Li   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5761*67e74705SXin Li                                                  (__v2df)(__m128d)(B), \
5762*67e74705SXin Li                                                  (__v2df)_mm_setzero_pd(), \
5763*67e74705SXin Li                                                  (__mmask8)(U), (int)(R)); })
5764*67e74705SXin Li 
5765*67e74705SXin Li #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5766*67e74705SXin Li   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5767*67e74705SXin Li                                                 (__v4sf)(__m128)(B), \
5768*67e74705SXin Li                                                 (__v4sf)_mm_setzero_ps(), \
5769*67e74705SXin Li                                                 (__mmask8)-1, (int)(R)); })
5770*67e74705SXin Li 
5771*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_getexp_ss(__m128 __A,__m128 __B)5772*67e74705SXin Li _mm_getexp_ss (__m128 __A, __m128 __B)
5773*67e74705SXin Li {
5774*67e74705SXin Li   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5775*67e74705SXin Li                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5776*67e74705SXin Li }
5777*67e74705SXin Li 
5778*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_getexp_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)5779*67e74705SXin Li _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5780*67e74705SXin Li {
5781*67e74705SXin Li  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5782*67e74705SXin Li           (__v4sf) __B,
5783*67e74705SXin Li           (__v4sf) __W,
5784*67e74705SXin Li           (__mmask8) __U,
5785*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
5786*67e74705SXin Li }
5787*67e74705SXin Li 
5788*67e74705SXin Li #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5789*67e74705SXin Li   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5790*67e74705SXin Li                                                 (__v4sf)(__m128)(B), \
5791*67e74705SXin Li                                                 (__v4sf)(__m128)(W), \
5792*67e74705SXin Li                                                 (__mmask8)(U), (int)(R)); })
5793*67e74705SXin Li 
5794*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_getexp_ss(__mmask8 __U,__m128 __A,__m128 __B)5795*67e74705SXin Li _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5796*67e74705SXin Li {
5797*67e74705SXin Li  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5798*67e74705SXin Li           (__v4sf) __B,
5799*67e74705SXin Li           (__v4sf) _mm_setzero_pd (),
5800*67e74705SXin Li           (__mmask8) __U,
5801*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
5802*67e74705SXin Li }
5803*67e74705SXin Li 
5804*67e74705SXin Li #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5805*67e74705SXin Li   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5806*67e74705SXin Li                                                 (__v4sf)(__m128)(B), \
5807*67e74705SXin Li                                                 (__v4sf)_mm_setzero_ps(), \
5808*67e74705SXin Li                                                 (__mmask8)(U), (int)(R)); })
5809*67e74705SXin Li 
5810*67e74705SXin Li #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5811*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5812*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5813*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5814*67e74705SXin Li                                                (__v2df)_mm_setzero_pd(), \
5815*67e74705SXin Li                                                (__mmask8)-1, (int)(R)); })
5816*67e74705SXin Li 
5817*67e74705SXin Li #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
5818*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5819*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5820*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5821*67e74705SXin Li                                                (__v2df)_mm_setzero_pd(), \
5822*67e74705SXin Li                                                (__mmask8)-1, \
5823*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
5824*67e74705SXin Li 
5825*67e74705SXin Li #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5826*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5827*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5828*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5829*67e74705SXin Li                                                (__v2df)(__m128d)(W), \
5830*67e74705SXin Li                                                (__mmask8)(U), \
5831*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
5832*67e74705SXin Li 
5833*67e74705SXin Li #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5834*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5835*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5836*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5837*67e74705SXin Li                                                (__v2df)(__m128d)(W), \
5838*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
5839*67e74705SXin Li 
5840*67e74705SXin Li #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5841*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5842*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5843*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5844*67e74705SXin Li                                                (__v2df)_mm_setzero_pd(), \
5845*67e74705SXin Li                                                (__mmask8)(U), \
5846*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
5847*67e74705SXin Li 
5848*67e74705SXin Li #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5849*67e74705SXin Li   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5850*67e74705SXin Li                                                (__v2df)(__m128d)(B), \
5851*67e74705SXin Li                                                (int)(((D)<<2) | (C)), \
5852*67e74705SXin Li                                                (__v2df)_mm_setzero_pd(), \
5853*67e74705SXin Li                                                (__mmask8)(U), (int)(R)); })
5854*67e74705SXin Li 
5855*67e74705SXin Li #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5856*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5857*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5858*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5859*67e74705SXin Li                                               (__v4sf)_mm_setzero_ps(), \
5860*67e74705SXin Li                                               (__mmask8)-1, (int)(R)); })
5861*67e74705SXin Li 
5862*67e74705SXin Li #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5863*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5864*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5865*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5866*67e74705SXin Li                                               (__v4sf)_mm_setzero_ps(), \
5867*67e74705SXin Li                                               (__mmask8)-1, \
5868*67e74705SXin Li                                               _MM_FROUND_CUR_DIRECTION); })
5869*67e74705SXin Li 
5870*67e74705SXin Li #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5871*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5872*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5873*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5874*67e74705SXin Li                                               (__v4sf)(__m128)(W), \
5875*67e74705SXin Li                                               (__mmask8)(U), \
5876*67e74705SXin Li                                               _MM_FROUND_CUR_DIRECTION); })
5877*67e74705SXin Li 
5878*67e74705SXin Li #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5879*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5880*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5881*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5882*67e74705SXin Li                                               (__v4sf)(__m128)(W), \
5883*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
5884*67e74705SXin Li 
5885*67e74705SXin Li #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5886*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5887*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5888*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5889*67e74705SXin Li                                               (__v4sf)_mm_setzero_pd(), \
5890*67e74705SXin Li                                               (__mmask8)(U), \
5891*67e74705SXin Li                                               _MM_FROUND_CUR_DIRECTION); })
5892*67e74705SXin Li 
5893*67e74705SXin Li #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5894*67e74705SXin Li   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5895*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
5896*67e74705SXin Li                                               (int)(((D)<<2) | (C)), \
5897*67e74705SXin Li                                               (__v4sf)_mm_setzero_ps(), \
5898*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
5899*67e74705SXin Li 
5900*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kmov(__mmask16 __A)5901*67e74705SXin Li _mm512_kmov (__mmask16 __A)
5902*67e74705SXin Li {
5903*67e74705SXin Li   return  __A;
5904*67e74705SXin Li }
5905*67e74705SXin Li 
5906*67e74705SXin Li #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5907*67e74705SXin Li   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5908*67e74705SXin Li                               (int)(P), (int)(R)); })
5909*67e74705SXin Li 
5910*67e74705SXin Li #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5911*67e74705SXin Li   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5912*67e74705SXin Li                               (int)(P), (int)(R)); })
5913*67e74705SXin Li 
5914*67e74705SXin Li #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5915*67e74705SXin Li   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
5916*67e74705SXin Li 
5917*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi32(__m512i __A,__m512i __I,__mmask16 __U,__m512i __B)5918*67e74705SXin Li _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5919*67e74705SXin Li          __mmask16 __U, __m512i __B)
5920*67e74705SXin Li {
5921*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5922*67e74705SXin Li                    (__v16si) __I
5923*67e74705SXin Li                    /* idx */ ,
5924*67e74705SXin Li                    (__v16si) __B,
5925*67e74705SXin Li                    (__mmask16) __U);
5926*67e74705SXin Li }
5927*67e74705SXin Li 
5928*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sll_epi32(__m512i __A,__m128i __B)5929*67e74705SXin Li _mm512_sll_epi32 (__m512i __A, __m128i __B)
5930*67e74705SXin Li {
5931*67e74705SXin Li   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5932*67e74705SXin Li              (__v4si) __B,
5933*67e74705SXin Li              (__v16si)
5934*67e74705SXin Li              _mm512_setzero_si512 (),
5935*67e74705SXin Li              (__mmask16) -1);
5936*67e74705SXin Li }
5937*67e74705SXin Li 
5938*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sll_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m128i __B)5939*67e74705SXin Li _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5940*67e74705SXin Li {
5941*67e74705SXin Li   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5942*67e74705SXin Li              (__v4si) __B,
5943*67e74705SXin Li              (__v16si) __W,
5944*67e74705SXin Li              (__mmask16) __U);
5945*67e74705SXin Li }
5946*67e74705SXin Li 
5947*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sll_epi32(__mmask16 __U,__m512i __A,__m128i __B)5948*67e74705SXin Li _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
5949*67e74705SXin Li {
5950*67e74705SXin Li   return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5951*67e74705SXin Li              (__v4si) __B,
5952*67e74705SXin Li              (__v16si)
5953*67e74705SXin Li              _mm512_setzero_si512 (),
5954*67e74705SXin Li              (__mmask16) __U);
5955*67e74705SXin Li }
5956*67e74705SXin Li 
5957*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sll_epi64(__m512i __A,__m128i __B)5958*67e74705SXin Li _mm512_sll_epi64 (__m512i __A, __m128i __B)
5959*67e74705SXin Li {
5960*67e74705SXin Li   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
5961*67e74705SXin Li              (__v2di) __B,
5962*67e74705SXin Li              (__v8di)
5963*67e74705SXin Li              _mm512_setzero_si512 (),
5964*67e74705SXin Li              (__mmask8) -1);
5965*67e74705SXin Li }
5966*67e74705SXin Li 
5967*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sll_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m128i __B)5968*67e74705SXin Li _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5969*67e74705SXin Li {
5970*67e74705SXin Li   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
5971*67e74705SXin Li              (__v2di) __B,
5972*67e74705SXin Li              (__v8di) __W,
5973*67e74705SXin Li              (__mmask8) __U);
5974*67e74705SXin Li }
5975*67e74705SXin Li 
5976*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sll_epi64(__mmask8 __U,__m512i __A,__m128i __B)5977*67e74705SXin Li _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
5978*67e74705SXin Li {
5979*67e74705SXin Li   return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
5980*67e74705SXin Li              (__v2di) __B,
5981*67e74705SXin Li              (__v8di)
5982*67e74705SXin Li              _mm512_setzero_si512 (),
5983*67e74705SXin Li              (__mmask8) __U);
5984*67e74705SXin Li }
5985*67e74705SXin Li 
5986*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sllv_epi32(__m512i __X,__m512i __Y)5987*67e74705SXin Li _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
5988*67e74705SXin Li {
5989*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
5990*67e74705SXin Li               (__v16si) __Y,
5991*67e74705SXin Li               (__v16si)
5992*67e74705SXin Li               _mm512_setzero_si512 (),
5993*67e74705SXin Li               (__mmask16) -1);
5994*67e74705SXin Li }
5995*67e74705SXin Li 
5996*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sllv_epi32(__m512i __W,__mmask16 __U,__m512i __X,__m512i __Y)5997*67e74705SXin Li _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5998*67e74705SXin Li {
5999*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
6000*67e74705SXin Li               (__v16si) __Y,
6001*67e74705SXin Li               (__v16si) __W,
6002*67e74705SXin Li               (__mmask16) __U);
6003*67e74705SXin Li }
6004*67e74705SXin Li 
6005*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sllv_epi32(__mmask16 __U,__m512i __X,__m512i __Y)6006*67e74705SXin Li _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6007*67e74705SXin Li {
6008*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
6009*67e74705SXin Li               (__v16si) __Y,
6010*67e74705SXin Li               (__v16si)
6011*67e74705SXin Li               _mm512_setzero_si512 (),
6012*67e74705SXin Li               (__mmask16) __U);
6013*67e74705SXin Li }
6014*67e74705SXin Li 
6015*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sllv_epi64(__m512i __X,__m512i __Y)6016*67e74705SXin Li _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
6017*67e74705SXin Li {
6018*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6019*67e74705SXin Li              (__v8di) __Y,
6020*67e74705SXin Li              (__v8di)
6021*67e74705SXin Li              _mm512_undefined_pd (),
6022*67e74705SXin Li              (__mmask8) -1);
6023*67e74705SXin Li }
6024*67e74705SXin Li 
6025*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sllv_epi64(__m512i __W,__mmask8 __U,__m512i __X,__m512i __Y)6026*67e74705SXin Li _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6027*67e74705SXin Li {
6028*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6029*67e74705SXin Li              (__v8di) __Y,
6030*67e74705SXin Li              (__v8di) __W,
6031*67e74705SXin Li              (__mmask8) __U);
6032*67e74705SXin Li }
6033*67e74705SXin Li 
6034*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sllv_epi64(__mmask8 __U,__m512i __X,__m512i __Y)6035*67e74705SXin Li _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6036*67e74705SXin Li {
6037*67e74705SXin Li   return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6038*67e74705SXin Li              (__v8di) __Y,
6039*67e74705SXin Li              (__v8di)
6040*67e74705SXin Li              _mm512_setzero_si512 (),
6041*67e74705SXin Li              (__mmask8) __U);
6042*67e74705SXin Li }
6043*67e74705SXin Li 
6044*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sra_epi32(__m512i __A,__m128i __B)6045*67e74705SXin Li _mm512_sra_epi32 (__m512i __A, __m128i __B)
6046*67e74705SXin Li {
6047*67e74705SXin Li   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6048*67e74705SXin Li              (__v4si) __B,
6049*67e74705SXin Li              (__v16si)
6050*67e74705SXin Li              _mm512_setzero_si512 (),
6051*67e74705SXin Li              (__mmask16) -1);
6052*67e74705SXin Li }
6053*67e74705SXin Li 
6054*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sra_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m128i __B)6055*67e74705SXin Li _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6056*67e74705SXin Li {
6057*67e74705SXin Li   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6058*67e74705SXin Li              (__v4si) __B,
6059*67e74705SXin Li              (__v16si) __W,
6060*67e74705SXin Li              (__mmask16) __U);
6061*67e74705SXin Li }
6062*67e74705SXin Li 
6063*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sra_epi32(__mmask16 __U,__m512i __A,__m128i __B)6064*67e74705SXin Li _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
6065*67e74705SXin Li {
6066*67e74705SXin Li   return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6067*67e74705SXin Li              (__v4si) __B,
6068*67e74705SXin Li              (__v16si)
6069*67e74705SXin Li              _mm512_setzero_si512 (),
6070*67e74705SXin Li              (__mmask16) __U);
6071*67e74705SXin Li }
6072*67e74705SXin Li 
6073*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_sra_epi64(__m512i __A,__m128i __B)6074*67e74705SXin Li _mm512_sra_epi64 (__m512i __A, __m128i __B)
6075*67e74705SXin Li {
6076*67e74705SXin Li   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6077*67e74705SXin Li              (__v2di) __B,
6078*67e74705SXin Li              (__v8di)
6079*67e74705SXin Li              _mm512_setzero_si512 (),
6080*67e74705SXin Li              (__mmask8) -1);
6081*67e74705SXin Li }
6082*67e74705SXin Li 
6083*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_sra_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m128i __B)6084*67e74705SXin Li _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6085*67e74705SXin Li {
6086*67e74705SXin Li   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6087*67e74705SXin Li              (__v2di) __B,
6088*67e74705SXin Li              (__v8di) __W,
6089*67e74705SXin Li              (__mmask8) __U);
6090*67e74705SXin Li }
6091*67e74705SXin Li 
6092*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_sra_epi64(__mmask8 __U,__m512i __A,__m128i __B)6093*67e74705SXin Li _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
6094*67e74705SXin Li {
6095*67e74705SXin Li   return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6096*67e74705SXin Li              (__v2di) __B,
6097*67e74705SXin Li              (__v8di)
6098*67e74705SXin Li              _mm512_setzero_si512 (),
6099*67e74705SXin Li              (__mmask8) __U);
6100*67e74705SXin Li }
6101*67e74705SXin Li 
6102*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srav_epi32(__m512i __X,__m512i __Y)6103*67e74705SXin Li _mm512_srav_epi32 (__m512i __X, __m512i __Y)
6104*67e74705SXin Li {
6105*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6106*67e74705SXin Li               (__v16si) __Y,
6107*67e74705SXin Li               (__v16si)
6108*67e74705SXin Li               _mm512_setzero_si512 (),
6109*67e74705SXin Li               (__mmask16) -1);
6110*67e74705SXin Li }
6111*67e74705SXin Li 
6112*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srav_epi32(__m512i __W,__mmask16 __U,__m512i __X,__m512i __Y)6113*67e74705SXin Li _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6114*67e74705SXin Li {
6115*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6116*67e74705SXin Li               (__v16si) __Y,
6117*67e74705SXin Li               (__v16si) __W,
6118*67e74705SXin Li               (__mmask16) __U);
6119*67e74705SXin Li }
6120*67e74705SXin Li 
6121*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srav_epi32(__mmask16 __U,__m512i __X,__m512i __Y)6122*67e74705SXin Li _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6123*67e74705SXin Li {
6124*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6125*67e74705SXin Li               (__v16si) __Y,
6126*67e74705SXin Li               (__v16si)
6127*67e74705SXin Li               _mm512_setzero_si512 (),
6128*67e74705SXin Li               (__mmask16) __U);
6129*67e74705SXin Li }
6130*67e74705SXin Li 
6131*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srav_epi64(__m512i __X,__m512i __Y)6132*67e74705SXin Li _mm512_srav_epi64 (__m512i __X, __m512i __Y)
6133*67e74705SXin Li {
6134*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6135*67e74705SXin Li              (__v8di) __Y,
6136*67e74705SXin Li              (__v8di)
6137*67e74705SXin Li              _mm512_setzero_si512 (),
6138*67e74705SXin Li              (__mmask8) -1);
6139*67e74705SXin Li }
6140*67e74705SXin Li 
6141*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srav_epi64(__m512i __W,__mmask8 __U,__m512i __X,__m512i __Y)6142*67e74705SXin Li _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6143*67e74705SXin Li {
6144*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6145*67e74705SXin Li              (__v8di) __Y,
6146*67e74705SXin Li              (__v8di) __W,
6147*67e74705SXin Li              (__mmask8) __U);
6148*67e74705SXin Li }
6149*67e74705SXin Li 
6150*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srav_epi64(__mmask8 __U,__m512i __X,__m512i __Y)6151*67e74705SXin Li _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6152*67e74705SXin Li {
6153*67e74705SXin Li   return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6154*67e74705SXin Li              (__v8di) __Y,
6155*67e74705SXin Li              (__v8di)
6156*67e74705SXin Li              _mm512_setzero_si512 (),
6157*67e74705SXin Li              (__mmask8) __U);
6158*67e74705SXin Li }
6159*67e74705SXin Li 
6160*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srl_epi32(__m512i __A,__m128i __B)6161*67e74705SXin Li _mm512_srl_epi32 (__m512i __A, __m128i __B)
6162*67e74705SXin Li {
6163*67e74705SXin Li   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6164*67e74705SXin Li              (__v4si) __B,
6165*67e74705SXin Li              (__v16si)
6166*67e74705SXin Li              _mm512_setzero_si512 (),
6167*67e74705SXin Li              (__mmask16) -1);
6168*67e74705SXin Li }
6169*67e74705SXin Li 
6170*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srl_epi32(__m512i __W,__mmask16 __U,__m512i __A,__m128i __B)6171*67e74705SXin Li _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6172*67e74705SXin Li {
6173*67e74705SXin Li   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6174*67e74705SXin Li              (__v4si) __B,
6175*67e74705SXin Li              (__v16si) __W,
6176*67e74705SXin Li              (__mmask16) __U);
6177*67e74705SXin Li }
6178*67e74705SXin Li 
6179*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srl_epi32(__mmask16 __U,__m512i __A,__m128i __B)6180*67e74705SXin Li _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
6181*67e74705SXin Li {
6182*67e74705SXin Li   return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6183*67e74705SXin Li              (__v4si) __B,
6184*67e74705SXin Li              (__v16si)
6185*67e74705SXin Li              _mm512_setzero_si512 (),
6186*67e74705SXin Li              (__mmask16) __U);
6187*67e74705SXin Li }
6188*67e74705SXin Li 
6189*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srl_epi64(__m512i __A,__m128i __B)6190*67e74705SXin Li _mm512_srl_epi64 (__m512i __A, __m128i __B)
6191*67e74705SXin Li {
6192*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6193*67e74705SXin Li              (__v2di) __B,
6194*67e74705SXin Li              (__v8di)
6195*67e74705SXin Li              _mm512_setzero_si512 (),
6196*67e74705SXin Li              (__mmask8) -1);
6197*67e74705SXin Li }
6198*67e74705SXin Li 
6199*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srl_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m128i __B)6200*67e74705SXin Li _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6201*67e74705SXin Li {
6202*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6203*67e74705SXin Li              (__v2di) __B,
6204*67e74705SXin Li              (__v8di) __W,
6205*67e74705SXin Li              (__mmask8) __U);
6206*67e74705SXin Li }
6207*67e74705SXin Li 
6208*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srl_epi64(__mmask8 __U,__m512i __A,__m128i __B)6209*67e74705SXin Li _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
6210*67e74705SXin Li {
6211*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6212*67e74705SXin Li              (__v2di) __B,
6213*67e74705SXin Li              (__v8di)
6214*67e74705SXin Li              _mm512_setzero_si512 (),
6215*67e74705SXin Li              (__mmask8) __U);
6216*67e74705SXin Li }
6217*67e74705SXin Li 
6218*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srlv_epi32(__m512i __X,__m512i __Y)6219*67e74705SXin Li _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
6220*67e74705SXin Li {
6221*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6222*67e74705SXin Li               (__v16si) __Y,
6223*67e74705SXin Li               (__v16si)
6224*67e74705SXin Li               _mm512_setzero_si512 (),
6225*67e74705SXin Li               (__mmask16) -1);
6226*67e74705SXin Li }
6227*67e74705SXin Li 
6228*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srlv_epi32(__m512i __W,__mmask16 __U,__m512i __X,__m512i __Y)6229*67e74705SXin Li _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6230*67e74705SXin Li {
6231*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6232*67e74705SXin Li               (__v16si) __Y,
6233*67e74705SXin Li               (__v16si) __W,
6234*67e74705SXin Li               (__mmask16) __U);
6235*67e74705SXin Li }
6236*67e74705SXin Li 
6237*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srlv_epi32(__mmask16 __U,__m512i __X,__m512i __Y)6238*67e74705SXin Li _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6239*67e74705SXin Li {
6240*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6241*67e74705SXin Li               (__v16si) __Y,
6242*67e74705SXin Li               (__v16si)
6243*67e74705SXin Li               _mm512_setzero_si512 (),
6244*67e74705SXin Li               (__mmask16) __U);
6245*67e74705SXin Li }
6246*67e74705SXin Li 
6247*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_srlv_epi64(__m512i __X,__m512i __Y)6248*67e74705SXin Li _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6249*67e74705SXin Li {
6250*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6251*67e74705SXin Li              (__v8di) __Y,
6252*67e74705SXin Li              (__v8di)
6253*67e74705SXin Li              _mm512_setzero_si512 (),
6254*67e74705SXin Li              (__mmask8) -1);
6255*67e74705SXin Li }
6256*67e74705SXin Li 
6257*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_srlv_epi64(__m512i __W,__mmask8 __U,__m512i __X,__m512i __Y)6258*67e74705SXin Li _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6259*67e74705SXin Li {
6260*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6261*67e74705SXin Li              (__v8di) __Y,
6262*67e74705SXin Li              (__v8di) __W,
6263*67e74705SXin Li              (__mmask8) __U);
6264*67e74705SXin Li }
6265*67e74705SXin Li 
6266*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_srlv_epi64(__mmask8 __U,__m512i __X,__m512i __Y)6267*67e74705SXin Li _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6268*67e74705SXin Li {
6269*67e74705SXin Li   return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6270*67e74705SXin Li              (__v8di) __Y,
6271*67e74705SXin Li              (__v8di)
6272*67e74705SXin Li              _mm512_setzero_si512 (),
6273*67e74705SXin Li              (__mmask8) __U);
6274*67e74705SXin Li }
6275*67e74705SXin Li 
6276*67e74705SXin Li #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6277*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6278*67e74705SXin Li                                             (__v16si)(__m512i)(B), \
6279*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
6280*67e74705SXin Li                                             (__mmask16)-1); })
6281*67e74705SXin Li 
6282*67e74705SXin Li #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6283*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6284*67e74705SXin Li                                             (__v16si)(__m512i)(B), \
6285*67e74705SXin Li                                             (__v16si)(__m512i)(C), (int)(imm), \
6286*67e74705SXin Li                                             (__mmask16)(U)); })
6287*67e74705SXin Li 
6288*67e74705SXin Li #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6289*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6290*67e74705SXin Li                                              (__v16si)(__m512i)(B), \
6291*67e74705SXin Li                                              (__v16si)(__m512i)(C), \
6292*67e74705SXin Li                                              (int)(imm), (__mmask16)(U)); })
6293*67e74705SXin Li 
6294*67e74705SXin Li #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6295*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6296*67e74705SXin Li                                             (__v8di)(__m512i)(B), \
6297*67e74705SXin Li                                             (__v8di)(__m512i)(C), (int)(imm), \
6298*67e74705SXin Li                                             (__mmask8)-1); })
6299*67e74705SXin Li 
6300*67e74705SXin Li #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6301*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6302*67e74705SXin Li                                             (__v8di)(__m512i)(B), \
6303*67e74705SXin Li                                             (__v8di)(__m512i)(C), (int)(imm), \
6304*67e74705SXin Li                                             (__mmask8)(U)); })
6305*67e74705SXin Li 
6306*67e74705SXin Li #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6307*67e74705SXin Li   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6308*67e74705SXin Li                                              (__v8di)(__m512i)(B), \
6309*67e74705SXin Li                                              (__v8di)(__m512i)(C), (int)(imm), \
6310*67e74705SXin Li                                              (__mmask8)(U)); })
6311*67e74705SXin Li 
6312*67e74705SXin Li #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6313*67e74705SXin Li   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6314*67e74705SXin Li 
6315*67e74705SXin Li #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6316*67e74705SXin Li   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6317*67e74705SXin Li 
6318*67e74705SXin Li #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6319*67e74705SXin Li   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6320*67e74705SXin Li 
6321*67e74705SXin Li #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6322*67e74705SXin Li   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6323*67e74705SXin Li 
6324*67e74705SXin Li static __inline__ unsigned __DEFAULT_FN_ATTRS
_mm_cvtsd_u32(__m128d __A)6325*67e74705SXin Li _mm_cvtsd_u32 (__m128d __A)
6326*67e74705SXin Li {
6327*67e74705SXin Li   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6328*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
6329*67e74705SXin Li }
6330*67e74705SXin Li 
6331*67e74705SXin Li #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6332*67e74705SXin Li   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6333*67e74705SXin Li                                                   (int)(R)); })
6334*67e74705SXin Li 
6335*67e74705SXin Li static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_cvtsd_u64(__m128d __A)6336*67e74705SXin Li _mm_cvtsd_u64 (__m128d __A)
6337*67e74705SXin Li {
6338*67e74705SXin Li   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6339*67e74705SXin Li                  __A,
6340*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
6341*67e74705SXin Li }
6342*67e74705SXin Li 
6343*67e74705SXin Li #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6344*67e74705SXin Li   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6345*67e74705SXin Li 
6346*67e74705SXin Li #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6347*67e74705SXin Li   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6348*67e74705SXin Li 
6349*67e74705SXin Li #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6350*67e74705SXin Li   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6351*67e74705SXin Li 
6352*67e74705SXin Li #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6353*67e74705SXin Li   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6354*67e74705SXin Li 
6355*67e74705SXin Li #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6356*67e74705SXin Li   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6357*67e74705SXin Li 
6358*67e74705SXin Li static __inline__ unsigned __DEFAULT_FN_ATTRS
_mm_cvtss_u32(__m128 __A)6359*67e74705SXin Li _mm_cvtss_u32 (__m128 __A)
6360*67e74705SXin Li {
6361*67e74705SXin Li   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6362*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
6363*67e74705SXin Li }
6364*67e74705SXin Li 
6365*67e74705SXin Li #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6366*67e74705SXin Li   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6367*67e74705SXin Li                                                   (int)(R)); })
6368*67e74705SXin Li 
6369*67e74705SXin Li static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_cvtss_u64(__m128 __A)6370*67e74705SXin Li _mm_cvtss_u64 (__m128 __A)
6371*67e74705SXin Li {
6372*67e74705SXin Li   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6373*67e74705SXin Li                  __A,
6374*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
6375*67e74705SXin Li }
6376*67e74705SXin Li 
6377*67e74705SXin Li #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6378*67e74705SXin Li   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6379*67e74705SXin Li 
6380*67e74705SXin Li #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6381*67e74705SXin Li   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6382*67e74705SXin Li 
6383*67e74705SXin Li static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvttsd_i32(__m128d __A)6384*67e74705SXin Li _mm_cvttsd_i32 (__m128d __A)
6385*67e74705SXin Li {
6386*67e74705SXin Li   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6387*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6388*67e74705SXin Li }
6389*67e74705SXin Li 
6390*67e74705SXin Li #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6391*67e74705SXin Li   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6392*67e74705SXin Li 
6393*67e74705SXin Li #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6394*67e74705SXin Li   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6395*67e74705SXin Li 
6396*67e74705SXin Li static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvttsd_i64(__m128d __A)6397*67e74705SXin Li _mm_cvttsd_i64 (__m128d __A)
6398*67e74705SXin Li {
6399*67e74705SXin Li   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6400*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6401*67e74705SXin Li }
6402*67e74705SXin Li 
6403*67e74705SXin Li #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6404*67e74705SXin Li   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6405*67e74705SXin Li 
6406*67e74705SXin Li static __inline__ unsigned __DEFAULT_FN_ATTRS
_mm_cvttsd_u32(__m128d __A)6407*67e74705SXin Li _mm_cvttsd_u32 (__m128d __A)
6408*67e74705SXin Li {
6409*67e74705SXin Li   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6410*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6411*67e74705SXin Li }
6412*67e74705SXin Li 
6413*67e74705SXin Li #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6414*67e74705SXin Li   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6415*67e74705SXin Li                                                    (int)(R)); })
6416*67e74705SXin Li 
6417*67e74705SXin Li static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_cvttsd_u64(__m128d __A)6418*67e74705SXin Li _mm_cvttsd_u64 (__m128d __A)
6419*67e74705SXin Li {
6420*67e74705SXin Li   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6421*67e74705SXin Li                   __A,
6422*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
6423*67e74705SXin Li }
6424*67e74705SXin Li 
6425*67e74705SXin Li #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6426*67e74705SXin Li   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6427*67e74705SXin Li 
6428*67e74705SXin Li #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6429*67e74705SXin Li   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6430*67e74705SXin Li 
6431*67e74705SXin Li static __inline__ int __DEFAULT_FN_ATTRS
_mm_cvttss_i32(__m128 __A)6432*67e74705SXin Li _mm_cvttss_i32 (__m128 __A)
6433*67e74705SXin Li {
6434*67e74705SXin Li   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6435*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6436*67e74705SXin Li }
6437*67e74705SXin Li 
6438*67e74705SXin Li #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6439*67e74705SXin Li   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6440*67e74705SXin Li 
6441*67e74705SXin Li #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6442*67e74705SXin Li   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6443*67e74705SXin Li 
6444*67e74705SXin Li static __inline__ long long __DEFAULT_FN_ATTRS
_mm_cvttss_i64(__m128 __A)6445*67e74705SXin Li _mm_cvttss_i64 (__m128 __A)
6446*67e74705SXin Li {
6447*67e74705SXin Li   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6448*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6449*67e74705SXin Li }
6450*67e74705SXin Li 
6451*67e74705SXin Li #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6452*67e74705SXin Li   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6453*67e74705SXin Li 
6454*67e74705SXin Li static __inline__ unsigned __DEFAULT_FN_ATTRS
_mm_cvttss_u32(__m128 __A)6455*67e74705SXin Li _mm_cvttss_u32 (__m128 __A)
6456*67e74705SXin Li {
6457*67e74705SXin Li   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6458*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6459*67e74705SXin Li }
6460*67e74705SXin Li 
6461*67e74705SXin Li #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6462*67e74705SXin Li   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6463*67e74705SXin Li                                                    (int)(R)); })
6464*67e74705SXin Li 
6465*67e74705SXin Li static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mm_cvttss_u64(__m128 __A)6466*67e74705SXin Li _mm_cvttss_u64 (__m128 __A)
6467*67e74705SXin Li {
6468*67e74705SXin Li   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6469*67e74705SXin Li                   __A,
6470*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
6471*67e74705SXin Li }
6472*67e74705SXin Li 
6473*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_pd(__m512d __A,__m512i __I,__mmask8 __U,__m512d __B)6474*67e74705SXin Li _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6475*67e74705SXin Li             __m512d __B)
6476*67e74705SXin Li {
6477*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6478*67e74705SXin Li               (__v8di) __I
6479*67e74705SXin Li               /* idx */ ,
6480*67e74705SXin Li               (__v8df) __B,
6481*67e74705SXin Li               (__mmask8) __U);
6482*67e74705SXin Li }
6483*67e74705SXin Li 
6484*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_ps(__m512 __A,__m512i __I,__mmask16 __U,__m512 __B)6485*67e74705SXin Li _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6486*67e74705SXin Li             __m512 __B)
6487*67e74705SXin Li {
6488*67e74705SXin Li   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6489*67e74705SXin Li                    (__v16si) __I
6490*67e74705SXin Li                    /* idx */ ,
6491*67e74705SXin Li                    (__v16sf) __B,
6492*67e74705SXin Li                    (__mmask16) __U);
6493*67e74705SXin Li }
6494*67e74705SXin Li 
6495*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi64(__m512i __A,__m512i __I,__mmask8 __U,__m512i __B)6496*67e74705SXin Li _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6497*67e74705SXin Li          __mmask8 __U, __m512i __B)
6498*67e74705SXin Li {
6499*67e74705SXin Li   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6500*67e74705SXin Li                    (__v8di) __I
6501*67e74705SXin Li                    /* idx */ ,
6502*67e74705SXin Li                    (__v8di) __B,
6503*67e74705SXin Li                    (__mmask8) __U);
6504*67e74705SXin Li }
6505*67e74705SXin Li 
6506*67e74705SXin Li #define _mm512_permute_pd(X, C) __extension__ ({ \
6507*67e74705SXin Li   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6508*67e74705SXin Li                                    (__v8df)_mm512_undefined_pd(), \
6509*67e74705SXin Li                                    0 + (((C) >> 0) & 0x1), \
6510*67e74705SXin Li                                    0 + (((C) >> 1) & 0x1), \
6511*67e74705SXin Li                                    2 + (((C) >> 2) & 0x1), \
6512*67e74705SXin Li                                    2 + (((C) >> 3) & 0x1), \
6513*67e74705SXin Li                                    4 + (((C) >> 4) & 0x1), \
6514*67e74705SXin Li                                    4 + (((C) >> 5) & 0x1), \
6515*67e74705SXin Li                                    6 + (((C) >> 6) & 0x1), \
6516*67e74705SXin Li                                    6 + (((C) >> 7) & 0x1)); })
6517*67e74705SXin Li 
6518*67e74705SXin Li #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6519*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6520*67e74705SXin Li                                        (__v8df)_mm512_permute_pd((X), (C)), \
6521*67e74705SXin Li                                        (__v8df)(__m512d)(W)); })
6522*67e74705SXin Li 
6523*67e74705SXin Li #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6524*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6525*67e74705SXin Li                                        (__v8df)_mm512_permute_pd((X), (C)), \
6526*67e74705SXin Li                                        (__v8df)_mm512_setzero_pd()); })
6527*67e74705SXin Li 
6528*67e74705SXin Li #define _mm512_permute_ps(X, C) __extension__ ({ \
6529*67e74705SXin Li   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6530*67e74705SXin Li                                   (__v16sf)_mm512_undefined_ps(), \
6531*67e74705SXin Li                                    0  + (((C) >> 0) & 0x3), \
6532*67e74705SXin Li                                    0  + (((C) >> 2) & 0x3), \
6533*67e74705SXin Li                                    0  + (((C) >> 4) & 0x3), \
6534*67e74705SXin Li                                    0  + (((C) >> 6) & 0x3), \
6535*67e74705SXin Li                                    4  + (((C) >> 0) & 0x3), \
6536*67e74705SXin Li                                    4  + (((C) >> 2) & 0x3), \
6537*67e74705SXin Li                                    4  + (((C) >> 4) & 0x3), \
6538*67e74705SXin Li                                    4  + (((C) >> 6) & 0x3), \
6539*67e74705SXin Li                                    8  + (((C) >> 0) & 0x3), \
6540*67e74705SXin Li                                    8  + (((C) >> 2) & 0x3), \
6541*67e74705SXin Li                                    8  + (((C) >> 4) & 0x3), \
6542*67e74705SXin Li                                    8  + (((C) >> 6) & 0x3), \
6543*67e74705SXin Li                                    12 + (((C) >> 0) & 0x3), \
6544*67e74705SXin Li                                    12 + (((C) >> 2) & 0x3), \
6545*67e74705SXin Li                                    12 + (((C) >> 4) & 0x3), \
6546*67e74705SXin Li                                    12 + (((C) >> 6) & 0x3)); })
6547*67e74705SXin Li 
6548*67e74705SXin Li #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6549*67e74705SXin Li   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6550*67e74705SXin Li                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6551*67e74705SXin Li                                       (__v16sf)(__m512)(W)); })
6552*67e74705SXin Li 
6553*67e74705SXin Li #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6554*67e74705SXin Li   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6555*67e74705SXin Li                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6556*67e74705SXin Li                                       (__v16sf)_mm512_setzero_ps()); })
6557*67e74705SXin Li 
6558*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_permutevar_pd(__m512d __A,__m512i __C)6559*67e74705SXin Li _mm512_permutevar_pd (__m512d __A, __m512i __C)
6560*67e74705SXin Li {
6561*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6562*67e74705SXin Li               (__v8di) __C,
6563*67e74705SXin Li               (__v8df)
6564*67e74705SXin Li               _mm512_undefined_pd (),
6565*67e74705SXin Li               (__mmask8) -1);
6566*67e74705SXin Li }
6567*67e74705SXin Li 
6568*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_permutevar_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512i __C)6569*67e74705SXin Li _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6570*67e74705SXin Li {
6571*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6572*67e74705SXin Li               (__v8di) __C,
6573*67e74705SXin Li               (__v8df) __W,
6574*67e74705SXin Li               (__mmask8) __U);
6575*67e74705SXin Li }
6576*67e74705SXin Li 
6577*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_permutevar_pd(__mmask8 __U,__m512d __A,__m512i __C)6578*67e74705SXin Li _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6579*67e74705SXin Li {
6580*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6581*67e74705SXin Li               (__v8di) __C,
6582*67e74705SXin Li               (__v8df)
6583*67e74705SXin Li               _mm512_setzero_pd (),
6584*67e74705SXin Li               (__mmask8) __U);
6585*67e74705SXin Li }
6586*67e74705SXin Li 
6587*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_permutevar_ps(__m512 __A,__m512i __C)6588*67e74705SXin Li _mm512_permutevar_ps (__m512 __A, __m512i __C)
6589*67e74705SXin Li {
6590*67e74705SXin Li   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6591*67e74705SXin Li                    (__v16si) __C,
6592*67e74705SXin Li                    (__v16sf)
6593*67e74705SXin Li                    _mm512_undefined_ps (),
6594*67e74705SXin Li                    (__mmask16) -1);
6595*67e74705SXin Li }
6596*67e74705SXin Li 
6597*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_permutevar_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512i __C)6598*67e74705SXin Li _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6599*67e74705SXin Li {
6600*67e74705SXin Li   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6601*67e74705SXin Li                    (__v16si) __C,
6602*67e74705SXin Li                    (__v16sf) __W,
6603*67e74705SXin Li                    (__mmask16) __U);
6604*67e74705SXin Li }
6605*67e74705SXin Li 
6606*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_permutevar_ps(__mmask16 __U,__m512 __A,__m512i __C)6607*67e74705SXin Li _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6608*67e74705SXin Li {
6609*67e74705SXin Li   return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6610*67e74705SXin Li                    (__v16si) __C,
6611*67e74705SXin Li                    (__v16sf)
6612*67e74705SXin Li                    _mm512_setzero_ps (),
6613*67e74705SXin Li                    (__mmask16) __U);
6614*67e74705SXin Li }
6615*67e74705SXin Li 
6616*67e74705SXin Li static __inline __m512d __DEFAULT_FN_ATTRS
_mm512_permutex2var_pd(__m512d __A,__m512i __I,__m512d __B)6617*67e74705SXin Li _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6618*67e74705SXin Li {
6619*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6620*67e74705SXin Li                     /* idx */ ,
6621*67e74705SXin Li                     (__v8df) __A,
6622*67e74705SXin Li                     (__v8df) __B,
6623*67e74705SXin Li                     (__mmask8) -1);
6624*67e74705SXin Li }
6625*67e74705SXin Li 
6626*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_pd(__m512d __A,__mmask8 __U,__m512i __I,__m512d __B)6627*67e74705SXin Li _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6628*67e74705SXin Li {
6629*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6630*67e74705SXin Li                     /* idx */ ,
6631*67e74705SXin Li                     (__v8df) __A,
6632*67e74705SXin Li                     (__v8df) __B,
6633*67e74705SXin Li                     (__mmask8) __U);
6634*67e74705SXin Li }
6635*67e74705SXin Li 
6636*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_pd(__mmask8 __U,__m512d __A,__m512i __I,__m512d __B)6637*67e74705SXin Li _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6638*67e74705SXin Li             __m512d __B)
6639*67e74705SXin Li {
6640*67e74705SXin Li   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6641*67e74705SXin Li                                                          /* idx */ ,
6642*67e74705SXin Li                                                          (__v8df) __A,
6643*67e74705SXin Li                                                          (__v8df) __B,
6644*67e74705SXin Li                                                          (__mmask8) __U);
6645*67e74705SXin Li }
6646*67e74705SXin Li 
6647*67e74705SXin Li static __inline __m512 __DEFAULT_FN_ATTRS
_mm512_permutex2var_ps(__m512 __A,__m512i __I,__m512 __B)6648*67e74705SXin Li _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6649*67e74705SXin Li {
6650*67e74705SXin Li   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6651*67e74705SXin Li                                                          /* idx */ ,
6652*67e74705SXin Li                                                          (__v16sf) __A,
6653*67e74705SXin Li                                                          (__v16sf) __B,
6654*67e74705SXin Li                                                          (__mmask16) -1);
6655*67e74705SXin Li }
6656*67e74705SXin Li 
6657*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_ps(__m512 __A,__mmask16 __U,__m512i __I,__m512 __B)6658*67e74705SXin Li _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6659*67e74705SXin Li {
6660*67e74705SXin Li   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6661*67e74705SXin Li                                                          /* idx */ ,
6662*67e74705SXin Li                                                          (__v16sf) __A,
6663*67e74705SXin Li                                                          (__v16sf) __B,
6664*67e74705SXin Li                                                          (__mmask16) __U);
6665*67e74705SXin Li }
6666*67e74705SXin Li 
6667*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_ps(__mmask16 __U,__m512 __A,__m512i __I,__m512 __B)6668*67e74705SXin Li _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6669*67e74705SXin Li             __m512 __B)
6670*67e74705SXin Li {
6671*67e74705SXin Li   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6672*67e74705SXin Li                                                         /* idx */ ,
6673*67e74705SXin Li                                                         (__v16sf) __A,
6674*67e74705SXin Li                                                         (__v16sf) __B,
6675*67e74705SXin Li                                                         (__mmask16) __U);
6676*67e74705SXin Li }
6677*67e74705SXin Li 
6678*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_testn_epi32_mask(__m512i __A,__m512i __B)6679*67e74705SXin Li _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6680*67e74705SXin Li {
6681*67e74705SXin Li   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6682*67e74705SXin Li              (__v16si) __B,
6683*67e74705SXin Li              (__mmask16) -1);
6684*67e74705SXin Li }
6685*67e74705SXin Li 
6686*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi32_mask(__mmask16 __U,__m512i __A,__m512i __B)6687*67e74705SXin Li _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6688*67e74705SXin Li {
6689*67e74705SXin Li   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6690*67e74705SXin Li              (__v16si) __B, __U);
6691*67e74705SXin Li }
6692*67e74705SXin Li 
6693*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_testn_epi64_mask(__m512i __A,__m512i __B)6694*67e74705SXin Li _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6695*67e74705SXin Li {
6696*67e74705SXin Li   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6697*67e74705SXin Li             (__v8di) __B,
6698*67e74705SXin Li             (__mmask8) -1);
6699*67e74705SXin Li }
6700*67e74705SXin Li 
6701*67e74705SXin Li static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_mask_testn_epi64_mask(__mmask8 __U,__m512i __A,__m512i __B)6702*67e74705SXin Li _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6703*67e74705SXin Li {
6704*67e74705SXin Li   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6705*67e74705SXin Li             (__v8di) __B, __U);
6706*67e74705SXin Li }
6707*67e74705SXin Li 
6708*67e74705SXin Li #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6709*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6710*67e74705SXin Li                                              (__v8si)_mm256_undefined_si256(), \
6711*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
6712*67e74705SXin Li 
6713*67e74705SXin Li #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6714*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6715*67e74705SXin Li                                              (__v8si)(__m256i)(W), \
6716*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
6717*67e74705SXin Li 
6718*67e74705SXin Li #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6719*67e74705SXin Li   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6720*67e74705SXin Li                                              (__v8si)_mm256_setzero_si256(), \
6721*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
6722*67e74705SXin Li 
6723*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epu32(__m512d __A)6724*67e74705SXin Li _mm512_cvttpd_epu32 (__m512d __A)
6725*67e74705SXin Li {
6726*67e74705SXin Li   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6727*67e74705SXin Li                   (__v8si)
6728*67e74705SXin Li                   _mm256_undefined_si256 (),
6729*67e74705SXin Li                   (__mmask8) -1,
6730*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
6731*67e74705SXin Li }
6732*67e74705SXin Li 
6733*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epu32(__m256i __W,__mmask8 __U,__m512d __A)6734*67e74705SXin Li _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6735*67e74705SXin Li {
6736*67e74705SXin Li   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6737*67e74705SXin Li                   (__v8si) __W,
6738*67e74705SXin Li                   (__mmask8) __U,
6739*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
6740*67e74705SXin Li }
6741*67e74705SXin Li 
6742*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epu32(__mmask8 __U,__m512d __A)6743*67e74705SXin Li _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6744*67e74705SXin Li {
6745*67e74705SXin Li   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6746*67e74705SXin Li                   (__v8si)
6747*67e74705SXin Li                   _mm256_setzero_si256 (),
6748*67e74705SXin Li                   (__mmask8) __U,
6749*67e74705SXin Li                   _MM_FROUND_CUR_DIRECTION);
6750*67e74705SXin Li }
6751*67e74705SXin Li 
6752*67e74705SXin Li #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6753*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6754*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6755*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
6756*67e74705SXin Li                                                 (__mmask8)-1, (int)(imm), \
6757*67e74705SXin Li                                                 (int)(R)); })
6758*67e74705SXin Li 
6759*67e74705SXin Li #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6760*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6761*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6762*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
6763*67e74705SXin Li                                                 (__mmask8)-1, (int)(imm), \
6764*67e74705SXin Li                                                 _MM_FROUND_CUR_DIRECTION); })
6765*67e74705SXin Li 
6766*67e74705SXin Li #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6767*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6768*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6769*67e74705SXin Li                                                 (__v2df)(__m128d)(W), \
6770*67e74705SXin Li                                                 (__mmask8)(U), (int)(imm), \
6771*67e74705SXin Li                                                 _MM_FROUND_CUR_DIRECTION); })
6772*67e74705SXin Li 
6773*67e74705SXin Li #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6774*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6775*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6776*67e74705SXin Li                                                 (__v2df)(__m128d)(W), \
6777*67e74705SXin Li                                                 (__mmask8)(U), (int)(I), \
6778*67e74705SXin Li                                                 (int)(R)); })
6779*67e74705SXin Li 
6780*67e74705SXin Li #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6781*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6782*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6783*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
6784*67e74705SXin Li                                                 (__mmask8)(U), (int)(I), \
6785*67e74705SXin Li                                                 _MM_FROUND_CUR_DIRECTION); })
6786*67e74705SXin Li 
6787*67e74705SXin Li #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6788*67e74705SXin Li   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6789*67e74705SXin Li                                                 (__v2df)(__m128d)(B), \
6790*67e74705SXin Li                                                 (__v2df)_mm_setzero_pd(), \
6791*67e74705SXin Li                                                 (__mmask8)(U), (int)(I), \
6792*67e74705SXin Li                                                 (int)(R)); })
6793*67e74705SXin Li 
6794*67e74705SXin Li #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6795*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6796*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6797*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
6798*67e74705SXin Li                                                (__mmask8)-1, (int)(imm), \
6799*67e74705SXin Li                                                (int)(R)); })
6800*67e74705SXin Li 
6801*67e74705SXin Li #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6802*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6803*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6804*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
6805*67e74705SXin Li                                                (__mmask8)-1, (int)(imm), \
6806*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
6807*67e74705SXin Li 
6808*67e74705SXin Li #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6809*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6810*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6811*67e74705SXin Li                                                (__v4sf)(__m128)(W), \
6812*67e74705SXin Li                                                (__mmask8)(U), (int)(I), \
6813*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
6814*67e74705SXin Li 
6815*67e74705SXin Li #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6816*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6817*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6818*67e74705SXin Li                                                (__v4sf)(__m128)(W), \
6819*67e74705SXin Li                                                (__mmask8)(U), (int)(I), \
6820*67e74705SXin Li                                                (int)(R)); })
6821*67e74705SXin Li 
6822*67e74705SXin Li #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6823*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6824*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6825*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
6826*67e74705SXin Li                                                (__mmask8)(U), (int)(I), \
6827*67e74705SXin Li                                                _MM_FROUND_CUR_DIRECTION); })
6828*67e74705SXin Li 
6829*67e74705SXin Li #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6830*67e74705SXin Li   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6831*67e74705SXin Li                                                (__v4sf)(__m128)(B), \
6832*67e74705SXin Li                                                (__v4sf)_mm_setzero_ps(), \
6833*67e74705SXin Li                                                (__mmask8)(U), (int)(I), \
6834*67e74705SXin Li                                                (int)(R)); })
6835*67e74705SXin Li 
6836*67e74705SXin Li #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6837*67e74705SXin Li   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6838*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
6839*67e74705SXin Li                                            (__v8df)_mm512_undefined_pd(), \
6840*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
6841*67e74705SXin Li 
6842*67e74705SXin Li #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6843*67e74705SXin Li   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6844*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
6845*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
6846*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
6847*67e74705SXin Li 
6848*67e74705SXin Li #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6849*67e74705SXin Li   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6850*67e74705SXin Li                                            (__v8df)(__m512d)(B), \
6851*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
6852*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
6853*67e74705SXin Li 
6854*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_scalef_pd(__m512d __A,__m512d __B)6855*67e74705SXin Li _mm512_scalef_pd (__m512d __A, __m512d __B)
6856*67e74705SXin Li {
6857*67e74705SXin Li   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6858*67e74705SXin Li                 (__v8df) __B,
6859*67e74705SXin Li                 (__v8df)
6860*67e74705SXin Li                 _mm512_undefined_pd (),
6861*67e74705SXin Li                 (__mmask8) -1,
6862*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
6863*67e74705SXin Li }
6864*67e74705SXin Li 
6865*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_scalef_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)6866*67e74705SXin Li _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6867*67e74705SXin Li {
6868*67e74705SXin Li   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6869*67e74705SXin Li                 (__v8df) __B,
6870*67e74705SXin Li                 (__v8df) __W,
6871*67e74705SXin Li                 (__mmask8) __U,
6872*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
6873*67e74705SXin Li }
6874*67e74705SXin Li 
6875*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_scalef_pd(__mmask8 __U,__m512d __A,__m512d __B)6876*67e74705SXin Li _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6877*67e74705SXin Li {
6878*67e74705SXin Li   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6879*67e74705SXin Li                 (__v8df) __B,
6880*67e74705SXin Li                 (__v8df)
6881*67e74705SXin Li                 _mm512_setzero_pd (),
6882*67e74705SXin Li                 (__mmask8) __U,
6883*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
6884*67e74705SXin Li }
6885*67e74705SXin Li 
6886*67e74705SXin Li #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6887*67e74705SXin Li   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6888*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
6889*67e74705SXin Li                                           (__v16sf)_mm512_undefined_ps(), \
6890*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
6891*67e74705SXin Li 
6892*67e74705SXin Li #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6893*67e74705SXin Li   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6894*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
6895*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
6896*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
6897*67e74705SXin Li 
6898*67e74705SXin Li #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6899*67e74705SXin Li   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6900*67e74705SXin Li                                           (__v16sf)(__m512)(B), \
6901*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
6902*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
6903*67e74705SXin Li 
6904*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_scalef_ps(__m512 __A,__m512 __B)6905*67e74705SXin Li _mm512_scalef_ps (__m512 __A, __m512 __B)
6906*67e74705SXin Li {
6907*67e74705SXin Li   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6908*67e74705SXin Li                (__v16sf) __B,
6909*67e74705SXin Li                (__v16sf)
6910*67e74705SXin Li                _mm512_undefined_ps (),
6911*67e74705SXin Li                (__mmask16) -1,
6912*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
6913*67e74705SXin Li }
6914*67e74705SXin Li 
6915*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_scalef_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)6916*67e74705SXin Li _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6917*67e74705SXin Li {
6918*67e74705SXin Li   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6919*67e74705SXin Li                (__v16sf) __B,
6920*67e74705SXin Li                (__v16sf) __W,
6921*67e74705SXin Li                (__mmask16) __U,
6922*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
6923*67e74705SXin Li }
6924*67e74705SXin Li 
6925*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_scalef_ps(__mmask16 __U,__m512 __A,__m512 __B)6926*67e74705SXin Li _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6927*67e74705SXin Li {
6928*67e74705SXin Li   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6929*67e74705SXin Li                (__v16sf) __B,
6930*67e74705SXin Li                (__v16sf)
6931*67e74705SXin Li                _mm512_setzero_ps (),
6932*67e74705SXin Li                (__mmask16) __U,
6933*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
6934*67e74705SXin Li }
6935*67e74705SXin Li 
6936*67e74705SXin Li #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6937*67e74705SXin Li   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6938*67e74705SXin Li                                               (__v2df)(__m128d)(B), \
6939*67e74705SXin Li                                               (__v2df)_mm_setzero_pd(), \
6940*67e74705SXin Li                                               (__mmask8)-1, (int)(R)); })
6941*67e74705SXin Li 
6942*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_scalef_sd(__m128d __A,__m128d __B)6943*67e74705SXin Li _mm_scalef_sd (__m128d __A, __m128d __B)
6944*67e74705SXin Li {
6945*67e74705SXin Li   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6946*67e74705SXin Li               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6947*67e74705SXin Li               (__mmask8) -1,
6948*67e74705SXin Li               _MM_FROUND_CUR_DIRECTION);
6949*67e74705SXin Li }
6950*67e74705SXin Li 
6951*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_scalef_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)6952*67e74705SXin Li _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6953*67e74705SXin Li {
6954*67e74705SXin Li  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6955*67e74705SXin Li                  (__v2df) __B,
6956*67e74705SXin Li                 (__v2df) __W,
6957*67e74705SXin Li                 (__mmask8) __U,
6958*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
6959*67e74705SXin Li }
6960*67e74705SXin Li 
6961*67e74705SXin Li #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6962*67e74705SXin Li   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6963*67e74705SXin Li                                               (__v2df)(__m128d)(B), \
6964*67e74705SXin Li                                               (__v2df)(__m128d)(W), \
6965*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
6966*67e74705SXin Li 
6967*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_scalef_sd(__mmask8 __U,__m128d __A,__m128d __B)6968*67e74705SXin Li _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6969*67e74705SXin Li {
6970*67e74705SXin Li  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6971*67e74705SXin Li                  (__v2df) __B,
6972*67e74705SXin Li                 (__v2df) _mm_setzero_pd (),
6973*67e74705SXin Li                 (__mmask8) __U,
6974*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
6975*67e74705SXin Li }
6976*67e74705SXin Li 
6977*67e74705SXin Li #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6978*67e74705SXin Li   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6979*67e74705SXin Li                                               (__v2df)(__m128d)(B), \
6980*67e74705SXin Li                                               (__v2df)_mm_setzero_pd(), \
6981*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
6982*67e74705SXin Li 
6983*67e74705SXin Li #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6984*67e74705SXin Li   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6985*67e74705SXin Li                                              (__v4sf)(__m128)(B), \
6986*67e74705SXin Li                                              (__v4sf)_mm_setzero_ps(), \
6987*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
6988*67e74705SXin Li 
6989*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_scalef_ss(__m128 __A,__m128 __B)6990*67e74705SXin Li _mm_scalef_ss (__m128 __A, __m128 __B)
6991*67e74705SXin Li {
6992*67e74705SXin Li   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6993*67e74705SXin Li              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6994*67e74705SXin Li              (__mmask8) -1,
6995*67e74705SXin Li              _MM_FROUND_CUR_DIRECTION);
6996*67e74705SXin Li }
6997*67e74705SXin Li 
6998*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_scalef_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)6999*67e74705SXin Li _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7000*67e74705SXin Li {
7001*67e74705SXin Li  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7002*67e74705SXin Li                 (__v4sf) __B,
7003*67e74705SXin Li                 (__v4sf) __W,
7004*67e74705SXin Li                 (__mmask8) __U,
7005*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7006*67e74705SXin Li }
7007*67e74705SXin Li 
7008*67e74705SXin Li #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7009*67e74705SXin Li   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7010*67e74705SXin Li                                              (__v4sf)(__m128)(B), \
7011*67e74705SXin Li                                              (__v4sf)(__m128)(W), \
7012*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
7013*67e74705SXin Li 
7014*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_scalef_ss(__mmask8 __U,__m128 __A,__m128 __B)7015*67e74705SXin Li _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7016*67e74705SXin Li {
7017*67e74705SXin Li  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7018*67e74705SXin Li                  (__v4sf) __B,
7019*67e74705SXin Li                 (__v4sf) _mm_setzero_ps (),
7020*67e74705SXin Li                 (__mmask8) __U,
7021*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7022*67e74705SXin Li }
7023*67e74705SXin Li 
7024*67e74705SXin Li #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7025*67e74705SXin Li   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7026*67e74705SXin Li                                              (__v4sf)(__m128)(B), \
7027*67e74705SXin Li                                              (__v4sf)_mm_setzero_ps(), \
7028*67e74705SXin Li                                              (__mmask8)(U), \
7029*67e74705SXin Li                                              _MM_FROUND_CUR_DIRECTION); })
7030*67e74705SXin Li 
7031*67e74705SXin Li #define _mm512_srai_epi32(A, B) __extension__ ({ \
7032*67e74705SXin Li   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7033*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
7034*67e74705SXin Li                                          (__mmask16)-1); })
7035*67e74705SXin Li 
7036*67e74705SXin Li #define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \
7037*67e74705SXin Li   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7038*67e74705SXin Li                                          (__v16si)(__m512i)(W), \
7039*67e74705SXin Li                                          (__mmask16)(U)); })
7040*67e74705SXin Li 
7041*67e74705SXin Li #define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \
7042*67e74705SXin Li   (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7043*67e74705SXin Li                                          (__v16si)_mm512_setzero_si512(), \
7044*67e74705SXin Li                                          (__mmask16)(U)); })
7045*67e74705SXin Li 
7046*67e74705SXin Li #define _mm512_srai_epi64(A, B) __extension__ ({ \
7047*67e74705SXin Li   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7048*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
7049*67e74705SXin Li                                          (__mmask8)-1); })
7050*67e74705SXin Li 
7051*67e74705SXin Li #define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \
7052*67e74705SXin Li   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7053*67e74705SXin Li                                          (__v8di)(__m512i)(W), \
7054*67e74705SXin Li                                          (__mmask8)(U)); })
7055*67e74705SXin Li 
7056*67e74705SXin Li #define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \
7057*67e74705SXin Li   (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7058*67e74705SXin Li                                          (__v8di)_mm512_setzero_si512(), \
7059*67e74705SXin Li                                          (__mmask8)(U)); })
7060*67e74705SXin Li 
7061*67e74705SXin Li #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7062*67e74705SXin Li   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7063*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(imm), \
7064*67e74705SXin Li                                          (__v16sf)_mm512_undefined_ps(), \
7065*67e74705SXin Li                                          (__mmask16)-1); })
7066*67e74705SXin Li 
7067*67e74705SXin Li #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7068*67e74705SXin Li   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7069*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(imm), \
7070*67e74705SXin Li                                          (__v16sf)(__m512)(W), \
7071*67e74705SXin Li                                          (__mmask16)(U)); })
7072*67e74705SXin Li 
7073*67e74705SXin Li #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7074*67e74705SXin Li   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7075*67e74705SXin Li                                          (__v16sf)(__m512)(B), (int)(imm), \
7076*67e74705SXin Li                                          (__v16sf)_mm512_setzero_ps(), \
7077*67e74705SXin Li                                          (__mmask16)(U)); })
7078*67e74705SXin Li 
7079*67e74705SXin Li #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7080*67e74705SXin Li   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7081*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(imm), \
7082*67e74705SXin Li                                           (__v8df)_mm512_undefined_pd(), \
7083*67e74705SXin Li                                           (__mmask8)-1); })
7084*67e74705SXin Li 
7085*67e74705SXin Li #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7086*67e74705SXin Li   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7087*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(imm), \
7088*67e74705SXin Li                                           (__v8df)(__m512d)(W), \
7089*67e74705SXin Li                                           (__mmask8)(U)); })
7090*67e74705SXin Li 
7091*67e74705SXin Li #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7092*67e74705SXin Li   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7093*67e74705SXin Li                                           (__v8df)(__m512d)(B), (int)(imm), \
7094*67e74705SXin Li                                           (__v8df)_mm512_setzero_pd(), \
7095*67e74705SXin Li                                           (__mmask8)(U)); })
7096*67e74705SXin Li 
7097*67e74705SXin Li #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7098*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7099*67e74705SXin Li                                           (__v16si)(__m512i)(B), (int)(imm), \
7100*67e74705SXin Li                                           (__v16si)_mm512_setzero_si512(), \
7101*67e74705SXin Li                                           (__mmask16)-1); })
7102*67e74705SXin Li 
7103*67e74705SXin Li #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7104*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7105*67e74705SXin Li                                           (__v16si)(__m512i)(B), (int)(imm), \
7106*67e74705SXin Li                                           (__v16si)(__m512i)(W), \
7107*67e74705SXin Li                                           (__mmask16)(U)); })
7108*67e74705SXin Li 
7109*67e74705SXin Li #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7110*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7111*67e74705SXin Li                                           (__v16si)(__m512i)(B), (int)(imm), \
7112*67e74705SXin Li                                           (__v16si)_mm512_setzero_si512(), \
7113*67e74705SXin Li                                           (__mmask16)(U)); })
7114*67e74705SXin Li 
7115*67e74705SXin Li #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7116*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7117*67e74705SXin Li                                           (__v8di)(__m512i)(B), (int)(imm), \
7118*67e74705SXin Li                                           (__v8di)_mm512_setzero_si512(), \
7119*67e74705SXin Li                                           (__mmask8)-1); })
7120*67e74705SXin Li 
7121*67e74705SXin Li #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7122*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7123*67e74705SXin Li                                           (__v8di)(__m512i)(B), (int)(imm), \
7124*67e74705SXin Li                                           (__v8di)(__m512i)(W), \
7125*67e74705SXin Li                                           (__mmask8)(U)); })
7126*67e74705SXin Li 
7127*67e74705SXin Li #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7128*67e74705SXin Li   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7129*67e74705SXin Li                                           (__v8di)(__m512i)(B), (int)(imm), \
7130*67e74705SXin Li                                           (__v8di)_mm512_setzero_si512(), \
7131*67e74705SXin Li                                           (__mmask8)(U)); })
7132*67e74705SXin Li 
7133*67e74705SXin Li #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7134*67e74705SXin Li   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7135*67e74705SXin Li                                    (__v8df)(__m512d)(B), \
7136*67e74705SXin Li                                    0  + (((M) >> 0) & 0x1), \
7137*67e74705SXin Li                                    8  + (((M) >> 1) & 0x1), \
7138*67e74705SXin Li                                    2  + (((M) >> 2) & 0x1), \
7139*67e74705SXin Li                                    10 + (((M) >> 3) & 0x1), \
7140*67e74705SXin Li                                    4  + (((M) >> 4) & 0x1), \
7141*67e74705SXin Li                                    12 + (((M) >> 5) & 0x1), \
7142*67e74705SXin Li                                    6  + (((M) >> 6) & 0x1), \
7143*67e74705SXin Li                                    14 + (((M) >> 7) & 0x1)); })
7144*67e74705SXin Li 
7145*67e74705SXin Li #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7146*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7147*67e74705SXin Li                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7148*67e74705SXin Li                                        (__v8df)(__m512d)(W)); })
7149*67e74705SXin Li 
7150*67e74705SXin Li #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7151*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7152*67e74705SXin Li                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7153*67e74705SXin Li                                        (__v8df)_mm512_setzero_pd()); })
7154*67e74705SXin Li 
7155*67e74705SXin Li #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7156*67e74705SXin Li   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7157*67e74705SXin Li                                    (__v16sf)(__m512)(B), \
7158*67e74705SXin Li                                    0  + (((M) >> 0) & 0x3), \
7159*67e74705SXin Li                                    0  + (((M) >> 2) & 0x3), \
7160*67e74705SXin Li                                    16 + (((M) >> 4) & 0x3), \
7161*67e74705SXin Li                                    16 + (((M) >> 6) & 0x3), \
7162*67e74705SXin Li                                    4  + (((M) >> 0) & 0x3), \
7163*67e74705SXin Li                                    4  + (((M) >> 2) & 0x3), \
7164*67e74705SXin Li                                    20 + (((M) >> 4) & 0x3), \
7165*67e74705SXin Li                                    20 + (((M) >> 6) & 0x3), \
7166*67e74705SXin Li                                    8  + (((M) >> 0) & 0x3), \
7167*67e74705SXin Li                                    8  + (((M) >> 2) & 0x3), \
7168*67e74705SXin Li                                    24 + (((M) >> 4) & 0x3), \
7169*67e74705SXin Li                                    24 + (((M) >> 6) & 0x3), \
7170*67e74705SXin Li                                    12 + (((M) >> 0) & 0x3), \
7171*67e74705SXin Li                                    12 + (((M) >> 2) & 0x3), \
7172*67e74705SXin Li                                    28 + (((M) >> 4) & 0x3), \
7173*67e74705SXin Li                                    28 + (((M) >> 6) & 0x3)); })
7174*67e74705SXin Li 
7175*67e74705SXin Li #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7176*67e74705SXin Li   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7177*67e74705SXin Li                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7178*67e74705SXin Li                                       (__v16sf)(__m512)(W)); })
7179*67e74705SXin Li 
7180*67e74705SXin Li #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7181*67e74705SXin Li   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7182*67e74705SXin Li                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7183*67e74705SXin Li                                       (__v16sf)_mm512_setzero_ps()); })
7184*67e74705SXin Li 
7185*67e74705SXin Li #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7186*67e74705SXin Li   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7187*67e74705SXin Li                                             (__v2df)(__m128d)(B), \
7188*67e74705SXin Li                                             (__v2df)_mm_setzero_pd(), \
7189*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
7190*67e74705SXin Li 
7191*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_sqrt_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)7192*67e74705SXin Li _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7193*67e74705SXin Li {
7194*67e74705SXin Li  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7195*67e74705SXin Li                  (__v2df) __B,
7196*67e74705SXin Li                 (__v2df) __W,
7197*67e74705SXin Li                 (__mmask8) __U,
7198*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7199*67e74705SXin Li }
7200*67e74705SXin Li 
7201*67e74705SXin Li #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7202*67e74705SXin Li   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7203*67e74705SXin Li                                             (__v2df)(__m128d)(B), \
7204*67e74705SXin Li                                             (__v2df)(__m128d)(W), \
7205*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
7206*67e74705SXin Li 
7207*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_sqrt_sd(__mmask8 __U,__m128d __A,__m128d __B)7208*67e74705SXin Li _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7209*67e74705SXin Li {
7210*67e74705SXin Li  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7211*67e74705SXin Li                  (__v2df) __B,
7212*67e74705SXin Li                 (__v2df) _mm_setzero_pd (),
7213*67e74705SXin Li                 (__mmask8) __U,
7214*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7215*67e74705SXin Li }
7216*67e74705SXin Li 
7217*67e74705SXin Li #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7218*67e74705SXin Li   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7219*67e74705SXin Li                                             (__v2df)(__m128d)(B), \
7220*67e74705SXin Li                                             (__v2df)_mm_setzero_pd(), \
7221*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
7222*67e74705SXin Li 
7223*67e74705SXin Li #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7224*67e74705SXin Li   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7225*67e74705SXin Li                                            (__v4sf)(__m128)(B), \
7226*67e74705SXin Li                                            (__v4sf)_mm_setzero_ps(), \
7227*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
7228*67e74705SXin Li 
7229*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_sqrt_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)7230*67e74705SXin Li _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7231*67e74705SXin Li {
7232*67e74705SXin Li  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7233*67e74705SXin Li                  (__v4sf) __B,
7234*67e74705SXin Li                 (__v4sf) __W,
7235*67e74705SXin Li                 (__mmask8) __U,
7236*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7237*67e74705SXin Li }
7238*67e74705SXin Li 
7239*67e74705SXin Li #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7240*67e74705SXin Li   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7241*67e74705SXin Li                                            (__v4sf)(__m128)(B), \
7242*67e74705SXin Li                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
7243*67e74705SXin Li                                            (int)(R)); })
7244*67e74705SXin Li 
7245*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_sqrt_ss(__mmask8 __U,__m128 __A,__m128 __B)7246*67e74705SXin Li _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7247*67e74705SXin Li {
7248*67e74705SXin Li  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7249*67e74705SXin Li                  (__v4sf) __B,
7250*67e74705SXin Li                 (__v4sf) _mm_setzero_ps (),
7251*67e74705SXin Li                 (__mmask8) __U,
7252*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
7253*67e74705SXin Li }
7254*67e74705SXin Li 
7255*67e74705SXin Li #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7256*67e74705SXin Li   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7257*67e74705SXin Li                                            (__v4sf)(__m128)(B), \
7258*67e74705SXin Li                                            (__v4sf)_mm_setzero_ps(), \
7259*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
7260*67e74705SXin Li 
7261*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x4(__m128 __A)7262*67e74705SXin Li _mm512_broadcast_f32x4 (__m128 __A)
7263*67e74705SXin Li {
7264*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7265*67e74705SXin Li                  (__v16sf)
7266*67e74705SXin Li                  _mm512_undefined_ps (),
7267*67e74705SXin Li                  (__mmask16) -1);
7268*67e74705SXin Li }
7269*67e74705SXin Li 
7270*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x4(__m512 __O,__mmask16 __M,__m128 __A)7271*67e74705SXin Li _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
7272*67e74705SXin Li {
7273*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7274*67e74705SXin Li                  (__v16sf) __O,
7275*67e74705SXin Li                  __M);
7276*67e74705SXin Li }
7277*67e74705SXin Li 
7278*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x4(__mmask16 __M,__m128 __A)7279*67e74705SXin Li _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
7280*67e74705SXin Li {
7281*67e74705SXin Li   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7282*67e74705SXin Li                  (__v16sf)
7283*67e74705SXin Li                  _mm512_setzero_ps (),
7284*67e74705SXin Li                  __M);
7285*67e74705SXin Li }
7286*67e74705SXin Li 
7287*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_broadcast_f64x4(__m256d __A)7288*67e74705SXin Li _mm512_broadcast_f64x4 (__m256d __A)
7289*67e74705SXin Li {
7290*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7291*67e74705SXin Li                   (__v8df)
7292*67e74705SXin Li                   _mm512_undefined_pd (),
7293*67e74705SXin Li                   (__mmask8) -1);
7294*67e74705SXin Li }
7295*67e74705SXin Li 
7296*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f64x4(__m512d __O,__mmask8 __M,__m256d __A)7297*67e74705SXin Li _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
7298*67e74705SXin Li {
7299*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7300*67e74705SXin Li                   (__v8df) __O,
7301*67e74705SXin Li                   __M);
7302*67e74705SXin Li }
7303*67e74705SXin Li 
7304*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f64x4(__mmask8 __M,__m256d __A)7305*67e74705SXin Li _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
7306*67e74705SXin Li {
7307*67e74705SXin Li   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7308*67e74705SXin Li                   (__v8df)
7309*67e74705SXin Li                   _mm512_setzero_pd (),
7310*67e74705SXin Li                   __M);
7311*67e74705SXin Li }
7312*67e74705SXin Li 
7313*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x4(__m128i __A)7314*67e74705SXin Li _mm512_broadcast_i32x4 (__m128i __A)
7315*67e74705SXin Li {
7316*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7317*67e74705SXin Li                   (__v16si)
7318*67e74705SXin Li                   _mm512_undefined_epi32 (),
7319*67e74705SXin Li                   (__mmask16) -1);
7320*67e74705SXin Li }
7321*67e74705SXin Li 
7322*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x4(__m512i __O,__mmask16 __M,__m128i __A)7323*67e74705SXin Li _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
7324*67e74705SXin Li {
7325*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7326*67e74705SXin Li                   (__v16si) __O,
7327*67e74705SXin Li                   __M);
7328*67e74705SXin Li }
7329*67e74705SXin Li 
7330*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x4(__mmask16 __M,__m128i __A)7331*67e74705SXin Li _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
7332*67e74705SXin Li {
7333*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7334*67e74705SXin Li                   (__v16si)
7335*67e74705SXin Li                   _mm512_setzero_si512 (),
7336*67e74705SXin Li                   __M);
7337*67e74705SXin Li }
7338*67e74705SXin Li 
7339*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i64x4(__m256i __A)7340*67e74705SXin Li _mm512_broadcast_i64x4 (__m256i __A)
7341*67e74705SXin Li {
7342*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7343*67e74705SXin Li                   (__v8di)
7344*67e74705SXin Li                   _mm512_undefined_epi32 (),
7345*67e74705SXin Li                   (__mmask8) -1);
7346*67e74705SXin Li }
7347*67e74705SXin Li 
7348*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i64x4(__m512i __O,__mmask8 __M,__m256i __A)7349*67e74705SXin Li _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
7350*67e74705SXin Li {
7351*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7352*67e74705SXin Li                   (__v8di) __O,
7353*67e74705SXin Li                   __M);
7354*67e74705SXin Li }
7355*67e74705SXin Li 
7356*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i64x4(__mmask8 __M,__m256i __A)7357*67e74705SXin Li _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
7358*67e74705SXin Li {
7359*67e74705SXin Li   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7360*67e74705SXin Li                   (__v8di)
7361*67e74705SXin Li                   _mm512_setzero_si512 (),
7362*67e74705SXin Li                   __M);
7363*67e74705SXin Li }
7364*67e74705SXin Li 
7365*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_broadcastsd_pd(__m512d __O,__mmask8 __M,__m128d __A)7366*67e74705SXin Li _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7367*67e74705SXin Li {
7368*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512(__M,
7369*67e74705SXin Li                                               (__v8df) _mm512_broadcastsd_pd(__A),
7370*67e74705SXin Li                                               (__v8df) __O);
7371*67e74705SXin Li }
7372*67e74705SXin Li 
7373*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_broadcastsd_pd(__mmask8 __M,__m128d __A)7374*67e74705SXin Li _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7375*67e74705SXin Li {
7376*67e74705SXin Li   return (__m512d)__builtin_ia32_selectpd_512(__M,
7377*67e74705SXin Li                                               (__v8df) _mm512_broadcastsd_pd(__A),
7378*67e74705SXin Li                                               (__v8df) _mm512_setzero_pd());
7379*67e74705SXin Li }
7380*67e74705SXin Li 
7381*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcastss_ps(__m512 __O,__mmask16 __M,__m128 __A)7382*67e74705SXin Li _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7383*67e74705SXin Li {
7384*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512(__M,
7385*67e74705SXin Li                                              (__v16sf) _mm512_broadcastss_ps(__A),
7386*67e74705SXin Li                                              (__v16sf) __O);
7387*67e74705SXin Li }
7388*67e74705SXin Li 
7389*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcastss_ps(__mmask16 __M,__m128 __A)7390*67e74705SXin Li _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7391*67e74705SXin Li {
7392*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512(__M,
7393*67e74705SXin Li                                              (__v16sf) _mm512_broadcastss_ps(__A),
7394*67e74705SXin Li                                              (__v16sf) _mm512_setzero_ps());
7395*67e74705SXin Li }
7396*67e74705SXin Li 
7397*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtsepi32_epi8(__m512i __A)7398*67e74705SXin Li _mm512_cvtsepi32_epi8 (__m512i __A)
7399*67e74705SXin Li {
7400*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7401*67e74705SXin Li                (__v16qi) _mm_undefined_si128 (),
7402*67e74705SXin Li                (__mmask16) -1);
7403*67e74705SXin Li }
7404*67e74705SXin Li 
7405*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi32_epi8(__m128i __O,__mmask16 __M,__m512i __A)7406*67e74705SXin Li _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7407*67e74705SXin Li {
7408*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7409*67e74705SXin Li                (__v16qi) __O, __M);
7410*67e74705SXin Li }
7411*67e74705SXin Li 
7412*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtsepi32_epi8(__mmask16 __M,__m512i __A)7413*67e74705SXin Li _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7414*67e74705SXin Li {
7415*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7416*67e74705SXin Li                (__v16qi) _mm_setzero_si128 (),
7417*67e74705SXin Li                __M);
7418*67e74705SXin Li }
7419*67e74705SXin Li 
7420*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi32_storeu_epi8(void * __P,__mmask16 __M,__m512i __A)7421*67e74705SXin Li _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7422*67e74705SXin Li {
7423*67e74705SXin Li   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7424*67e74705SXin Li }
7425*67e74705SXin Li 
7426*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtsepi32_epi16(__m512i __A)7427*67e74705SXin Li _mm512_cvtsepi32_epi16 (__m512i __A)
7428*67e74705SXin Li {
7429*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7430*67e74705SXin Li                (__v16hi) _mm256_undefined_si256 (),
7431*67e74705SXin Li                (__mmask16) -1);
7432*67e74705SXin Li }
7433*67e74705SXin Li 
7434*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi32_epi16(__m256i __O,__mmask16 __M,__m512i __A)7435*67e74705SXin Li _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7436*67e74705SXin Li {
7437*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7438*67e74705SXin Li                (__v16hi) __O, __M);
7439*67e74705SXin Li }
7440*67e74705SXin Li 
7441*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtsepi32_epi16(__mmask16 __M,__m512i __A)7442*67e74705SXin Li _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7443*67e74705SXin Li {
7444*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7445*67e74705SXin Li                (__v16hi) _mm256_setzero_si256 (),
7446*67e74705SXin Li                __M);
7447*67e74705SXin Li }
7448*67e74705SXin Li 
7449*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi32_storeu_epi16(void * __P,__mmask16 __M,__m512i __A)7450*67e74705SXin Li _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7451*67e74705SXin Li {
7452*67e74705SXin Li   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7453*67e74705SXin Li }
7454*67e74705SXin Li 
7455*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtsepi64_epi8(__m512i __A)7456*67e74705SXin Li _mm512_cvtsepi64_epi8 (__m512i __A)
7457*67e74705SXin Li {
7458*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7459*67e74705SXin Li                (__v16qi) _mm_undefined_si128 (),
7460*67e74705SXin Li                (__mmask8) -1);
7461*67e74705SXin Li }
7462*67e74705SXin Li 
7463*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_epi8(__m128i __O,__mmask8 __M,__m512i __A)7464*67e74705SXin Li _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7465*67e74705SXin Li {
7466*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7467*67e74705SXin Li                (__v16qi) __O, __M);
7468*67e74705SXin Li }
7469*67e74705SXin Li 
7470*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtsepi64_epi8(__mmask8 __M,__m512i __A)7471*67e74705SXin Li _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7472*67e74705SXin Li {
7473*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7474*67e74705SXin Li                (__v16qi) _mm_setzero_si128 (),
7475*67e74705SXin Li                __M);
7476*67e74705SXin Li }
7477*67e74705SXin Li 
7478*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_storeu_epi8(void * __P,__mmask8 __M,__m512i __A)7479*67e74705SXin Li _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7480*67e74705SXin Li {
7481*67e74705SXin Li   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7482*67e74705SXin Li }
7483*67e74705SXin Li 
7484*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtsepi64_epi32(__m512i __A)7485*67e74705SXin Li _mm512_cvtsepi64_epi32 (__m512i __A)
7486*67e74705SXin Li {
7487*67e74705SXin Li   __v8si __O;
7488*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7489*67e74705SXin Li                (__v8si) _mm256_undefined_si256 (),
7490*67e74705SXin Li                (__mmask8) -1);
7491*67e74705SXin Li }
7492*67e74705SXin Li 
7493*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_epi32(__m256i __O,__mmask8 __M,__m512i __A)7494*67e74705SXin Li _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7495*67e74705SXin Li {
7496*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7497*67e74705SXin Li                (__v8si) __O, __M);
7498*67e74705SXin Li }
7499*67e74705SXin Li 
7500*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtsepi64_epi32(__mmask8 __M,__m512i __A)7501*67e74705SXin Li _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7502*67e74705SXin Li {
7503*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7504*67e74705SXin Li                (__v8si) _mm256_setzero_si256 (),
7505*67e74705SXin Li                __M);
7506*67e74705SXin Li }
7507*67e74705SXin Li 
7508*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_storeu_epi32(void * __P,__mmask8 __M,__m512i __A)7509*67e74705SXin Li _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7510*67e74705SXin Li {
7511*67e74705SXin Li   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7512*67e74705SXin Li }
7513*67e74705SXin Li 
7514*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtsepi64_epi16(__m512i __A)7515*67e74705SXin Li _mm512_cvtsepi64_epi16 (__m512i __A)
7516*67e74705SXin Li {
7517*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7518*67e74705SXin Li                (__v8hi) _mm_undefined_si128 (),
7519*67e74705SXin Li                (__mmask8) -1);
7520*67e74705SXin Li }
7521*67e74705SXin Li 
7522*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_epi16(__m128i __O,__mmask8 __M,__m512i __A)7523*67e74705SXin Li _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7524*67e74705SXin Li {
7525*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7526*67e74705SXin Li                (__v8hi) __O, __M);
7527*67e74705SXin Li }
7528*67e74705SXin Li 
7529*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtsepi64_epi16(__mmask8 __M,__m512i __A)7530*67e74705SXin Li _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7531*67e74705SXin Li {
7532*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7533*67e74705SXin Li                (__v8hi) _mm_setzero_si128 (),
7534*67e74705SXin Li                __M);
7535*67e74705SXin Li }
7536*67e74705SXin Li 
7537*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtsepi64_storeu_epi16(void * __P,__mmask8 __M,__m512i __A)7538*67e74705SXin Li _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7539*67e74705SXin Li {
7540*67e74705SXin Li   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7541*67e74705SXin Li }
7542*67e74705SXin Li 
7543*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtusepi32_epi8(__m512i __A)7544*67e74705SXin Li _mm512_cvtusepi32_epi8 (__m512i __A)
7545*67e74705SXin Li {
7546*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7547*67e74705SXin Li                 (__v16qi) _mm_undefined_si128 (),
7548*67e74705SXin Li                 (__mmask16) -1);
7549*67e74705SXin Li }
7550*67e74705SXin Li 
7551*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi32_epi8(__m128i __O,__mmask16 __M,__m512i __A)7552*67e74705SXin Li _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7553*67e74705SXin Li {
7554*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7555*67e74705SXin Li                 (__v16qi) __O,
7556*67e74705SXin Li                 __M);
7557*67e74705SXin Li }
7558*67e74705SXin Li 
7559*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtusepi32_epi8(__mmask16 __M,__m512i __A)7560*67e74705SXin Li _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7561*67e74705SXin Li {
7562*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7563*67e74705SXin Li                 (__v16qi) _mm_setzero_si128 (),
7564*67e74705SXin Li                 __M);
7565*67e74705SXin Li }
7566*67e74705SXin Li 
7567*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi32_storeu_epi8(void * __P,__mmask16 __M,__m512i __A)7568*67e74705SXin Li _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7569*67e74705SXin Li {
7570*67e74705SXin Li   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7571*67e74705SXin Li }
7572*67e74705SXin Li 
7573*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtusepi32_epi16(__m512i __A)7574*67e74705SXin Li _mm512_cvtusepi32_epi16 (__m512i __A)
7575*67e74705SXin Li {
7576*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7577*67e74705SXin Li                 (__v16hi) _mm256_undefined_si256 (),
7578*67e74705SXin Li                 (__mmask16) -1);
7579*67e74705SXin Li }
7580*67e74705SXin Li 
7581*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi32_epi16(__m256i __O,__mmask16 __M,__m512i __A)7582*67e74705SXin Li _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7583*67e74705SXin Li {
7584*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7585*67e74705SXin Li                 (__v16hi) __O,
7586*67e74705SXin Li                 __M);
7587*67e74705SXin Li }
7588*67e74705SXin Li 
7589*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtusepi32_epi16(__mmask16 __M,__m512i __A)7590*67e74705SXin Li _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7591*67e74705SXin Li {
7592*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7593*67e74705SXin Li                 (__v16hi) _mm256_setzero_si256 (),
7594*67e74705SXin Li                 __M);
7595*67e74705SXin Li }
7596*67e74705SXin Li 
7597*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi32_storeu_epi16(void * __P,__mmask16 __M,__m512i __A)7598*67e74705SXin Li _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7599*67e74705SXin Li {
7600*67e74705SXin Li   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7601*67e74705SXin Li }
7602*67e74705SXin Li 
7603*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtusepi64_epi8(__m512i __A)7604*67e74705SXin Li _mm512_cvtusepi64_epi8 (__m512i __A)
7605*67e74705SXin Li {
7606*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7607*67e74705SXin Li                 (__v16qi) _mm_undefined_si128 (),
7608*67e74705SXin Li                 (__mmask8) -1);
7609*67e74705SXin Li }
7610*67e74705SXin Li 
7611*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_epi8(__m128i __O,__mmask8 __M,__m512i __A)7612*67e74705SXin Li _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7613*67e74705SXin Li {
7614*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7615*67e74705SXin Li                 (__v16qi) __O,
7616*67e74705SXin Li                 __M);
7617*67e74705SXin Li }
7618*67e74705SXin Li 
7619*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtusepi64_epi8(__mmask8 __M,__m512i __A)7620*67e74705SXin Li _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7621*67e74705SXin Li {
7622*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7623*67e74705SXin Li                 (__v16qi) _mm_setzero_si128 (),
7624*67e74705SXin Li                 __M);
7625*67e74705SXin Li }
7626*67e74705SXin Li 
7627*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_storeu_epi8(void * __P,__mmask8 __M,__m512i __A)7628*67e74705SXin Li _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7629*67e74705SXin Li {
7630*67e74705SXin Li   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7631*67e74705SXin Li }
7632*67e74705SXin Li 
7633*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtusepi64_epi32(__m512i __A)7634*67e74705SXin Li _mm512_cvtusepi64_epi32 (__m512i __A)
7635*67e74705SXin Li {
7636*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7637*67e74705SXin Li                 (__v8si) _mm256_undefined_si256 (),
7638*67e74705SXin Li                 (__mmask8) -1);
7639*67e74705SXin Li }
7640*67e74705SXin Li 
7641*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_epi32(__m256i __O,__mmask8 __M,__m512i __A)7642*67e74705SXin Li _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7643*67e74705SXin Li {
7644*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7645*67e74705SXin Li                 (__v8si) __O, __M);
7646*67e74705SXin Li }
7647*67e74705SXin Li 
7648*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtusepi64_epi32(__mmask8 __M,__m512i __A)7649*67e74705SXin Li _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7650*67e74705SXin Li {
7651*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7652*67e74705SXin Li                 (__v8si) _mm256_setzero_si256 (),
7653*67e74705SXin Li                 __M);
7654*67e74705SXin Li }
7655*67e74705SXin Li 
7656*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_storeu_epi32(void * __P,__mmask8 __M,__m512i __A)7657*67e74705SXin Li _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7658*67e74705SXin Li {
7659*67e74705SXin Li   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7660*67e74705SXin Li }
7661*67e74705SXin Li 
7662*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtusepi64_epi16(__m512i __A)7663*67e74705SXin Li _mm512_cvtusepi64_epi16 (__m512i __A)
7664*67e74705SXin Li {
7665*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7666*67e74705SXin Li                 (__v8hi) _mm_undefined_si128 (),
7667*67e74705SXin Li                 (__mmask8) -1);
7668*67e74705SXin Li }
7669*67e74705SXin Li 
7670*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_epi16(__m128i __O,__mmask8 __M,__m512i __A)7671*67e74705SXin Li _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7672*67e74705SXin Li {
7673*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7674*67e74705SXin Li                 (__v8hi) __O, __M);
7675*67e74705SXin Li }
7676*67e74705SXin Li 
7677*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtusepi64_epi16(__mmask8 __M,__m512i __A)7678*67e74705SXin Li _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7679*67e74705SXin Li {
7680*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7681*67e74705SXin Li                 (__v8hi) _mm_setzero_si128 (),
7682*67e74705SXin Li                 __M);
7683*67e74705SXin Li }
7684*67e74705SXin Li 
7685*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtusepi64_storeu_epi16(void * __P,__mmask8 __M,__m512i __A)7686*67e74705SXin Li _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7687*67e74705SXin Li {
7688*67e74705SXin Li   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7689*67e74705SXin Li }
7690*67e74705SXin Li 
7691*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtepi32_epi8(__m512i __A)7692*67e74705SXin Li _mm512_cvtepi32_epi8 (__m512i __A)
7693*67e74705SXin Li {
7694*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7695*67e74705SXin Li               (__v16qi) _mm_undefined_si128 (),
7696*67e74705SXin Li               (__mmask16) -1);
7697*67e74705SXin Li }
7698*67e74705SXin Li 
7699*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_epi8(__m128i __O,__mmask16 __M,__m512i __A)7700*67e74705SXin Li _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7701*67e74705SXin Li {
7702*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7703*67e74705SXin Li               (__v16qi) __O, __M);
7704*67e74705SXin Li }
7705*67e74705SXin Li 
7706*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi32_epi8(__mmask16 __M,__m512i __A)7707*67e74705SXin Li _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7708*67e74705SXin Li {
7709*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7710*67e74705SXin Li               (__v16qi) _mm_setzero_si128 (),
7711*67e74705SXin Li               __M);
7712*67e74705SXin Li }
7713*67e74705SXin Li 
7714*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_storeu_epi8(void * __P,__mmask16 __M,__m512i __A)7715*67e74705SXin Li _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7716*67e74705SXin Li {
7717*67e74705SXin Li   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7718*67e74705SXin Li }
7719*67e74705SXin Li 
7720*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtepi32_epi16(__m512i __A)7721*67e74705SXin Li _mm512_cvtepi32_epi16 (__m512i __A)
7722*67e74705SXin Li {
7723*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7724*67e74705SXin Li               (__v16hi) _mm256_undefined_si256 (),
7725*67e74705SXin Li               (__mmask16) -1);
7726*67e74705SXin Li }
7727*67e74705SXin Li 
7728*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_epi16(__m256i __O,__mmask16 __M,__m512i __A)7729*67e74705SXin Li _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7730*67e74705SXin Li {
7731*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7732*67e74705SXin Li               (__v16hi) __O, __M);
7733*67e74705SXin Li }
7734*67e74705SXin Li 
7735*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi32_epi16(__mmask16 __M,__m512i __A)7736*67e74705SXin Li _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7737*67e74705SXin Li {
7738*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7739*67e74705SXin Li               (__v16hi) _mm256_setzero_si256 (),
7740*67e74705SXin Li               __M);
7741*67e74705SXin Li }
7742*67e74705SXin Li 
7743*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi32_storeu_epi16(void * __P,__mmask16 __M,__m512i __A)7744*67e74705SXin Li _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7745*67e74705SXin Li {
7746*67e74705SXin Li   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7747*67e74705SXin Li }
7748*67e74705SXin Li 
7749*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtepi64_epi8(__m512i __A)7750*67e74705SXin Li _mm512_cvtepi64_epi8 (__m512i __A)
7751*67e74705SXin Li {
7752*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7753*67e74705SXin Li               (__v16qi) _mm_undefined_si128 (),
7754*67e74705SXin Li               (__mmask8) -1);
7755*67e74705SXin Li }
7756*67e74705SXin Li 
7757*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_epi8(__m128i __O,__mmask8 __M,__m512i __A)7758*67e74705SXin Li _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7759*67e74705SXin Li {
7760*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7761*67e74705SXin Li               (__v16qi) __O, __M);
7762*67e74705SXin Li }
7763*67e74705SXin Li 
7764*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_epi8(__mmask8 __M,__m512i __A)7765*67e74705SXin Li _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7766*67e74705SXin Li {
7767*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7768*67e74705SXin Li               (__v16qi) _mm_setzero_si128 (),
7769*67e74705SXin Li               __M);
7770*67e74705SXin Li }
7771*67e74705SXin Li 
7772*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_storeu_epi8(void * __P,__mmask8 __M,__m512i __A)7773*67e74705SXin Li _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7774*67e74705SXin Li {
7775*67e74705SXin Li   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7776*67e74705SXin Li }
7777*67e74705SXin Li 
7778*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtepi64_epi32(__m512i __A)7779*67e74705SXin Li _mm512_cvtepi64_epi32 (__m512i __A)
7780*67e74705SXin Li {
7781*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7782*67e74705SXin Li               (__v8si) _mm256_undefined_si256 (),
7783*67e74705SXin Li               (__mmask8) -1);
7784*67e74705SXin Li }
7785*67e74705SXin Li 
7786*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_epi32(__m256i __O,__mmask8 __M,__m512i __A)7787*67e74705SXin Li _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7788*67e74705SXin Li {
7789*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7790*67e74705SXin Li               (__v8si) __O, __M);
7791*67e74705SXin Li }
7792*67e74705SXin Li 
7793*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_epi32(__mmask8 __M,__m512i __A)7794*67e74705SXin Li _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7795*67e74705SXin Li {
7796*67e74705SXin Li   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7797*67e74705SXin Li               (__v8si) _mm256_setzero_si256 (),
7798*67e74705SXin Li               __M);
7799*67e74705SXin Li }
7800*67e74705SXin Li 
7801*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_storeu_epi32(void * __P,__mmask8 __M,__m512i __A)7802*67e74705SXin Li _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7803*67e74705SXin Li {
7804*67e74705SXin Li   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7805*67e74705SXin Li }
7806*67e74705SXin Li 
7807*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtepi64_epi16(__m512i __A)7808*67e74705SXin Li _mm512_cvtepi64_epi16 (__m512i __A)
7809*67e74705SXin Li {
7810*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7811*67e74705SXin Li               (__v8hi) _mm_undefined_si128 (),
7812*67e74705SXin Li               (__mmask8) -1);
7813*67e74705SXin Li }
7814*67e74705SXin Li 
7815*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_epi16(__m128i __O,__mmask8 __M,__m512i __A)7816*67e74705SXin Li _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7817*67e74705SXin Li {
7818*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7819*67e74705SXin Li               (__v8hi) __O, __M);
7820*67e74705SXin Li }
7821*67e74705SXin Li 
7822*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_epi16(__mmask8 __M,__m512i __A)7823*67e74705SXin Li _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7824*67e74705SXin Li {
7825*67e74705SXin Li   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7826*67e74705SXin Li               (__v8hi) _mm_setzero_si128 (),
7827*67e74705SXin Li               __M);
7828*67e74705SXin Li }
7829*67e74705SXin Li 
7830*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_storeu_epi16(void * __P,__mmask8 __M,__m512i __A)7831*67e74705SXin Li _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7832*67e74705SXin Li {
7833*67e74705SXin Li   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7834*67e74705SXin Li }
7835*67e74705SXin Li 
7836*67e74705SXin Li #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
7837*67e74705SXin Li   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7838*67e74705SXin Li                                             (__v4si)_mm_undefined_si128(), \
7839*67e74705SXin Li                                             (__mmask8)-1); })
7840*67e74705SXin Li 
7841*67e74705SXin Li #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7842*67e74705SXin Li   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7843*67e74705SXin Li                                             (__v4si)(__m128i)(W), \
7844*67e74705SXin Li                                             (__mmask8)(U)); })
7845*67e74705SXin Li 
7846*67e74705SXin Li #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7847*67e74705SXin Li   (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7848*67e74705SXin Li                                             (__v4si)_mm_setzero_si128(), \
7849*67e74705SXin Li                                             (__mmask8)(U)); })
7850*67e74705SXin Li 
7851*67e74705SXin Li #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
7852*67e74705SXin Li   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7853*67e74705SXin Li                                             (__v4di)_mm256_undefined_si256(), \
7854*67e74705SXin Li                                             (__mmask8)-1); })
7855*67e74705SXin Li 
7856*67e74705SXin Li #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7857*67e74705SXin Li   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7858*67e74705SXin Li                                             (__v4di)(__m256i)(W), \
7859*67e74705SXin Li                                             (__mmask8)(U)); })
7860*67e74705SXin Li 
7861*67e74705SXin Li #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7862*67e74705SXin Li   (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7863*67e74705SXin Li                                             (__v4di)_mm256_setzero_si256(), \
7864*67e74705SXin Li                                             (__mmask8)(U)); })
7865*67e74705SXin Li 
7866*67e74705SXin Li #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7867*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
7868*67e74705SXin Li                                            (__v4df)(__m256d)(B), (int)(imm), \
7869*67e74705SXin Li                                            (__v8df)_mm512_undefined_pd(), \
7870*67e74705SXin Li                                            (__mmask8)-1); })
7871*67e74705SXin Li 
7872*67e74705SXin Li #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7873*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
7874*67e74705SXin Li                                            (__v4df)(__m256d)(B), (int)(imm), \
7875*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
7876*67e74705SXin Li                                            (__mmask8)(U)); })
7877*67e74705SXin Li 
7878*67e74705SXin Li #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7879*67e74705SXin Li   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
7880*67e74705SXin Li                                            (__v4df)(__m256d)(B), (int)(imm), \
7881*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
7882*67e74705SXin Li                                            (__mmask8)(U)); })
7883*67e74705SXin Li 
7884*67e74705SXin Li #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7885*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
7886*67e74705SXin Li                                            (__v4di)(__m256i)(B), (int)(imm), \
7887*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
7888*67e74705SXin Li                                            (__mmask8)-1); })
7889*67e74705SXin Li 
7890*67e74705SXin Li #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7891*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
7892*67e74705SXin Li                                            (__v4di)(__m256i)(B), (int)(imm), \
7893*67e74705SXin Li                                            (__v8di)(__m512i)(W), \
7894*67e74705SXin Li                                            (__mmask8)(U)); })
7895*67e74705SXin Li 
7896*67e74705SXin Li #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7897*67e74705SXin Li   (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
7898*67e74705SXin Li                                            (__v4di)(__m256i)(B), (int)(imm), \
7899*67e74705SXin Li                                            (__v8di)_mm512_setzero_si512(), \
7900*67e74705SXin Li                                            (__mmask8)(U)); })
7901*67e74705SXin Li 
7902*67e74705SXin Li #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7903*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
7904*67e74705SXin Li                                           (__v4sf)(__m128)(B), (int)(imm), \
7905*67e74705SXin Li                                           (__v16sf)_mm512_undefined_ps(), \
7906*67e74705SXin Li                                           (__mmask16)-1); })
7907*67e74705SXin Li 
7908*67e74705SXin Li #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7909*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
7910*67e74705SXin Li                                           (__v4sf)(__m128)(B), (int)(imm), \
7911*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
7912*67e74705SXin Li                                           (__mmask16)(U)); })
7913*67e74705SXin Li 
7914*67e74705SXin Li #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7915*67e74705SXin Li   (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
7916*67e74705SXin Li                                           (__v4sf)(__m128)(B), (int)(imm), \
7917*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
7918*67e74705SXin Li                                           (__mmask16)(U)); })
7919*67e74705SXin Li 
7920*67e74705SXin Li #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
7921*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
7922*67e74705SXin Li                                            (__v4si)(__m128i)(B), (int)(imm), \
7923*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
7924*67e74705SXin Li                                            (__mmask16)-1); })
7925*67e74705SXin Li 
7926*67e74705SXin Li #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7927*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
7928*67e74705SXin Li                                            (__v4si)(__m128i)(B), (int)(imm), \
7929*67e74705SXin Li                                            (__v16si)(__m512i)(W), \
7930*67e74705SXin Li                                            (__mmask16)(U)); })
7931*67e74705SXin Li 
7932*67e74705SXin Li #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7933*67e74705SXin Li   (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
7934*67e74705SXin Li                                            (__v4si)(__m128i)(B), (int)(imm), \
7935*67e74705SXin Li                                            (__v16si)_mm512_setzero_si512(), \
7936*67e74705SXin Li                                            (__mmask16)(U)); })
7937*67e74705SXin Li 
7938*67e74705SXin Li #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7939*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7940*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7941*67e74705SXin Li                                             (__v8df)_mm512_undefined_pd(), \
7942*67e74705SXin Li                                             (__mmask8)-1, (int)(R)); })
7943*67e74705SXin Li 
7944*67e74705SXin Li #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
7945*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7946*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7947*67e74705SXin Li                                             (__v8df)(__m512d)(W), \
7948*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
7949*67e74705SXin Li 
7950*67e74705SXin Li #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
7951*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7952*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7953*67e74705SXin Li                                             (__v8df)_mm512_setzero_pd(), \
7954*67e74705SXin Li                                             (__mmask8)(U), (int)(R)); })
7955*67e74705SXin Li 
7956*67e74705SXin Li #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
7957*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7958*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7959*67e74705SXin Li                                             (__v8df)_mm512_setzero_pd(), \
7960*67e74705SXin Li                                             (__mmask8)-1, \
7961*67e74705SXin Li                                             _MM_FROUND_CUR_DIRECTION); })
7962*67e74705SXin Li 
7963*67e74705SXin Li #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7964*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7965*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7966*67e74705SXin Li                                             (__v8df)(__m512d)(W), \
7967*67e74705SXin Li                                             (__mmask8)(U), \
7968*67e74705SXin Li                                             _MM_FROUND_CUR_DIRECTION); })
7969*67e74705SXin Li 
7970*67e74705SXin Li #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7971*67e74705SXin Li   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7972*67e74705SXin Li                                             (int)(((C)<<2) | (B)), \
7973*67e74705SXin Li                                             (__v8df)_mm512_setzero_pd(), \
7974*67e74705SXin Li                                             (__mmask8)(U), \
7975*67e74705SXin Li                                             _MM_FROUND_CUR_DIRECTION); })
7976*67e74705SXin Li 
7977*67e74705SXin Li #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
7978*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7979*67e74705SXin Li                                            (int)(((C)<<2) | (B)), \
7980*67e74705SXin Li                                            (__v16sf)_mm512_undefined_ps(), \
7981*67e74705SXin Li                                            (__mmask16)-1, (int)(R)); })
7982*67e74705SXin Li 
7983*67e74705SXin Li #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
7984*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7985*67e74705SXin Li                                            (int)(((C)<<2) | (B)), \
7986*67e74705SXin Li                                            (__v16sf)(__m512)(W), \
7987*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
7988*67e74705SXin Li 
7989*67e74705SXin Li #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
7990*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7991*67e74705SXin Li                                            (int)(((C)<<2) | (B)), \
7992*67e74705SXin Li                                            (__v16sf)_mm512_setzero_ps(), \
7993*67e74705SXin Li                                            (__mmask16)(U), (int)(R)); })
7994*67e74705SXin Li 
7995*67e74705SXin Li #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
7996*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7997*67e74705SXin Li                                            (int)(((C)<<2)|(B)), \
7998*67e74705SXin Li                                            (__v16sf)_mm512_undefined_ps(), \
7999*67e74705SXin Li                                            (__mmask16)-1, \
8000*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
8001*67e74705SXin Li 
8002*67e74705SXin Li #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8003*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8004*67e74705SXin Li                                            (int)(((C)<<2)|(B)), \
8005*67e74705SXin Li                                            (__v16sf)(__m512)(W), \
8006*67e74705SXin Li                                            (__mmask16)(U), \
8007*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
8008*67e74705SXin Li 
8009*67e74705SXin Li #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8010*67e74705SXin Li   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8011*67e74705SXin Li                                            (int)(((C)<<2)|(B)), \
8012*67e74705SXin Li                                            (__v16sf)_mm512_setzero_ps(), \
8013*67e74705SXin Li                                            (__mmask16)(U), \
8014*67e74705SXin Li                                            _MM_FROUND_CUR_DIRECTION); })
8015*67e74705SXin Li 
8016*67e74705SXin Li #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8017*67e74705SXin Li   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8018*67e74705SXin Li                                            (__v8df)_mm512_undefined_pd(), \
8019*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
8020*67e74705SXin Li 
8021*67e74705SXin Li #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8022*67e74705SXin Li   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8023*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
8024*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
8025*67e74705SXin Li 
8026*67e74705SXin Li #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8027*67e74705SXin Li   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8028*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
8029*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
8030*67e74705SXin Li 
8031*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_getexp_pd(__m512d __A)8032*67e74705SXin Li _mm512_getexp_pd (__m512d __A)
8033*67e74705SXin Li {
8034*67e74705SXin Li   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8035*67e74705SXin Li                 (__v8df) _mm512_undefined_pd (),
8036*67e74705SXin Li                 (__mmask8) -1,
8037*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
8038*67e74705SXin Li }
8039*67e74705SXin Li 
8040*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_getexp_pd(__m512d __W,__mmask8 __U,__m512d __A)8041*67e74705SXin Li _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8042*67e74705SXin Li {
8043*67e74705SXin Li   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8044*67e74705SXin Li                 (__v8df) __W,
8045*67e74705SXin Li                 (__mmask8) __U,
8046*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
8047*67e74705SXin Li }
8048*67e74705SXin Li 
8049*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_getexp_pd(__mmask8 __U,__m512d __A)8050*67e74705SXin Li _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8051*67e74705SXin Li {
8052*67e74705SXin Li   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8053*67e74705SXin Li                 (__v8df) _mm512_setzero_pd (),
8054*67e74705SXin Li                 (__mmask8) __U,
8055*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
8056*67e74705SXin Li }
8057*67e74705SXin Li 
8058*67e74705SXin Li #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8059*67e74705SXin Li   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8060*67e74705SXin Li                                           (__v16sf)_mm512_undefined_ps(), \
8061*67e74705SXin Li                                           (__mmask16)-1, (int)(R)); })
8062*67e74705SXin Li 
8063*67e74705SXin Li #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8064*67e74705SXin Li   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8065*67e74705SXin Li                                           (__v16sf)(__m512)(W), \
8066*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
8067*67e74705SXin Li 
8068*67e74705SXin Li #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8069*67e74705SXin Li   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8070*67e74705SXin Li                                           (__v16sf)_mm512_setzero_ps(), \
8071*67e74705SXin Li                                           (__mmask16)(U), (int)(R)); })
8072*67e74705SXin Li 
8073*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_getexp_ps(__m512 __A)8074*67e74705SXin Li _mm512_getexp_ps (__m512 __A)
8075*67e74705SXin Li {
8076*67e74705SXin Li   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8077*67e74705SXin Li                (__v16sf) _mm512_undefined_ps (),
8078*67e74705SXin Li                (__mmask16) -1,
8079*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
8080*67e74705SXin Li }
8081*67e74705SXin Li 
8082*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_getexp_ps(__m512 __W,__mmask16 __U,__m512 __A)8083*67e74705SXin Li _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8084*67e74705SXin Li {
8085*67e74705SXin Li   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8086*67e74705SXin Li                (__v16sf) __W,
8087*67e74705SXin Li                (__mmask16) __U,
8088*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
8089*67e74705SXin Li }
8090*67e74705SXin Li 
8091*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_getexp_ps(__mmask16 __U,__m512 __A)8092*67e74705SXin Li _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8093*67e74705SXin Li {
8094*67e74705SXin Li   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8095*67e74705SXin Li                (__v16sf) _mm512_setzero_ps (),
8096*67e74705SXin Li                (__mmask16) __U,
8097*67e74705SXin Li                _MM_FROUND_CUR_DIRECTION);
8098*67e74705SXin Li }
8099*67e74705SXin Li 
8100*67e74705SXin Li #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8101*67e74705SXin Li   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8102*67e74705SXin Li                                        (float const *)(addr), \
8103*67e74705SXin Li                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8104*67e74705SXin Li                                        (int)(scale)); })
8105*67e74705SXin Li 
8106*67e74705SXin Li #define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
8107*67e74705SXin Li                                   __addr, __scale) __extension__({\
8108*67e74705SXin Li __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
8109*67e74705SXin Li                               __addr,(__v8di) __index, __mask, __scale);\
8110*67e74705SXin Li })
8111*67e74705SXin Li 
8112*67e74705SXin Li #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8113*67e74705SXin Li   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8114*67e74705SXin Li                                         (int const *)(addr), \
8115*67e74705SXin Li                                         (__v8di)(__m512i)(index), \
8116*67e74705SXin Li                                         (__mmask8)-1, (int)(scale)); })
8117*67e74705SXin Li 
8118*67e74705SXin Li #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8119*67e74705SXin Li   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8120*67e74705SXin Li                                         (int const *)(addr), \
8121*67e74705SXin Li                                         (__v8di)(__m512i)(index), \
8122*67e74705SXin Li                                         (__mmask8)(mask), (int)(scale)); })
8123*67e74705SXin Li 
8124*67e74705SXin Li #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8125*67e74705SXin Li   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8126*67e74705SXin Li                                        (double const *)(addr), \
8127*67e74705SXin Li                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8128*67e74705SXin Li                                        (int)(scale)); })
8129*67e74705SXin Li 
8130*67e74705SXin Li #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8131*67e74705SXin Li   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8132*67e74705SXin Li                                        (double const *)(addr), \
8133*67e74705SXin Li                                        (__v8di)(__m512i)(index), \
8134*67e74705SXin Li                                        (__mmask8)(mask), (int)(scale)); })
8135*67e74705SXin Li 
8136*67e74705SXin Li #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8137*67e74705SXin Li   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8138*67e74705SXin Li                                        (long long const *)(addr), \
8139*67e74705SXin Li                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8140*67e74705SXin Li                                        (int)(scale)); })
8141*67e74705SXin Li 
8142*67e74705SXin Li #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8143*67e74705SXin Li   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8144*67e74705SXin Li                                        (long long const *)(addr), \
8145*67e74705SXin Li                                        (__v8di)(__m512i)(index), \
8146*67e74705SXin Li                                        (__mmask8)(mask), (int)(scale)); })
8147*67e74705SXin Li 
8148*67e74705SXin Li #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8149*67e74705SXin Li   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8150*67e74705SXin Li                                        (float const *)(addr), \
8151*67e74705SXin Li                                        (__v16sf)(__m512)(index), \
8152*67e74705SXin Li                                        (__mmask16)-1, (int)(scale)); })
8153*67e74705SXin Li 
8154*67e74705SXin Li #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8155*67e74705SXin Li   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8156*67e74705SXin Li                                        (float const *)(addr), \
8157*67e74705SXin Li                                        (__v16sf)(__m512)(index), \
8158*67e74705SXin Li                                        (__mmask16)(mask), (int)(scale)); })
8159*67e74705SXin Li 
8160*67e74705SXin Li #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8161*67e74705SXin Li   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8162*67e74705SXin Li                                         (int const *)(addr), \
8163*67e74705SXin Li                                         (__v16si)(__m512i)(index), \
8164*67e74705SXin Li                                         (__mmask16)-1, (int)(scale)); })
8165*67e74705SXin Li 
8166*67e74705SXin Li #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8167*67e74705SXin Li   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8168*67e74705SXin Li                                         (int const *)(addr), \
8169*67e74705SXin Li                                         (__v16si)(__m512i)(index), \
8170*67e74705SXin Li                                         (__mmask16)(mask), (int)(scale)); })
8171*67e74705SXin Li 
8172*67e74705SXin Li #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8173*67e74705SXin Li   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8174*67e74705SXin Li                                        (double const *)(addr), \
8175*67e74705SXin Li                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8176*67e74705SXin Li                                        (int)(scale)); })
8177*67e74705SXin Li 
8178*67e74705SXin Li #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8179*67e74705SXin Li   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8180*67e74705SXin Li                                        (double const *)(addr), \
8181*67e74705SXin Li                                        (__v8si)(__m256i)(index), \
8182*67e74705SXin Li                                        (__mmask8)(mask), (int)(scale)); })
8183*67e74705SXin Li 
8184*67e74705SXin Li #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8185*67e74705SXin Li   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8186*67e74705SXin Li                                        (long long const *)(addr), \
8187*67e74705SXin Li                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8188*67e74705SXin Li                                        (int)(scale)); })
8189*67e74705SXin Li 
8190*67e74705SXin Li #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8191*67e74705SXin Li   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8192*67e74705SXin Li                                        (long long const *)(addr), \
8193*67e74705SXin Li                                        (__v8si)(__m256i)(index), \
8194*67e74705SXin Li                                        (__mmask8)(mask), (int)(scale)); })
8195*67e74705SXin Li 
8196*67e74705SXin Li #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8197*67e74705SXin Li   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8198*67e74705SXin Li                                 (__v8di)(__m512i)(index), \
8199*67e74705SXin Li                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8200*67e74705SXin Li 
8201*67e74705SXin Li #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8202*67e74705SXin Li   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8203*67e74705SXin Li                                 (__v8di)(__m512i)(index), \
8204*67e74705SXin Li                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8205*67e74705SXin Li 
8206*67e74705SXin Li #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8207*67e74705SXin Li   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8208*67e74705SXin Li                                 (__v8di)(__m512i)(index), \
8209*67e74705SXin Li                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8210*67e74705SXin Li 
8211*67e74705SXin Li #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8212*67e74705SXin Li   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8213*67e74705SXin Li                                 (__v8di)(__m512i)(index), \
8214*67e74705SXin Li                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8215*67e74705SXin Li 
8216*67e74705SXin Li #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8217*67e74705SXin Li   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8218*67e74705SXin Li                                (__v8di)(__m512i)(index), \
8219*67e74705SXin Li                                (__v8df)(__m512d)(v1), (int)(scale)); })
8220*67e74705SXin Li 
8221*67e74705SXin Li #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8222*67e74705SXin Li   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8223*67e74705SXin Li                                (__v8di)(__m512i)(index), \
8224*67e74705SXin Li                                (__v8df)(__m512d)(v1), (int)(scale)); })
8225*67e74705SXin Li 
8226*67e74705SXin Li #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8227*67e74705SXin Li   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8228*67e74705SXin Li                                (__v8di)(__m512i)(index), \
8229*67e74705SXin Li                                (__v8di)(__m512i)(v1), (int)(scale)); })
8230*67e74705SXin Li 
8231*67e74705SXin Li #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8232*67e74705SXin Li   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8233*67e74705SXin Li                                (__v8di)(__m512i)(index), \
8234*67e74705SXin Li                                (__v8di)(__m512i)(v1), (int)(scale)); })
8235*67e74705SXin Li 
8236*67e74705SXin Li #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8237*67e74705SXin Li   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8238*67e74705SXin Li                                 (__v16si)(__m512i)(index), \
8239*67e74705SXin Li                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8240*67e74705SXin Li 
8241*67e74705SXin Li #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8242*67e74705SXin Li   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8243*67e74705SXin Li                                 (__v16si)(__m512i)(index), \
8244*67e74705SXin Li                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8245*67e74705SXin Li 
8246*67e74705SXin Li #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8247*67e74705SXin Li   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8248*67e74705SXin Li                                 (__v16si)(__m512i)(index), \
8249*67e74705SXin Li                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8250*67e74705SXin Li 
8251*67e74705SXin Li #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8252*67e74705SXin Li   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8253*67e74705SXin Li                                 (__v16si)(__m512i)(index), \
8254*67e74705SXin Li                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8255*67e74705SXin Li 
8256*67e74705SXin Li #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8257*67e74705SXin Li   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8258*67e74705SXin Li                                (__v8si)(__m256i)(index), \
8259*67e74705SXin Li                                (__v8df)(__m512d)(v1), (int)(scale)); })
8260*67e74705SXin Li 
8261*67e74705SXin Li #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8262*67e74705SXin Li   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8263*67e74705SXin Li                                (__v8si)(__m256i)(index), \
8264*67e74705SXin Li                                (__v8df)(__m512d)(v1), (int)(scale)); })
8265*67e74705SXin Li 
8266*67e74705SXin Li #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8267*67e74705SXin Li   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8268*67e74705SXin Li                                (__v8si)(__m256i)(index), \
8269*67e74705SXin Li                                (__v8di)(__m512i)(v1), (int)(scale)); })
8270*67e74705SXin Li 
8271*67e74705SXin Li #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8272*67e74705SXin Li   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8273*67e74705SXin Li                                (__v8si)(__m256i)(index), \
8274*67e74705SXin Li                                (__v8di)(__m512i)(v1), (int)(scale)); })
8275*67e74705SXin Li 
8276*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmadd_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)8277*67e74705SXin Li _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8278*67e74705SXin Li {
8279*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
8280*67e74705SXin Li           (__v4sf) __B,
8281*67e74705SXin Li           (__v4sf) __W,
8282*67e74705SXin Li           (__mmask8) __U,
8283*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8284*67e74705SXin Li }
8285*67e74705SXin Li 
8286*67e74705SXin Li #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8287*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
8288*67e74705SXin Li                                         (__v4sf)(__m128)(B), \
8289*67e74705SXin Li                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
8290*67e74705SXin Li                                         (int)(R)); })
8291*67e74705SXin Li 
8292*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_ss(__mmask8 __U,__m128 __A,__m128 __B,__m128 __C)8293*67e74705SXin Li _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8294*67e74705SXin Li {
8295*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8296*67e74705SXin Li           (__v4sf) __B,
8297*67e74705SXin Li           (__v4sf) __C,
8298*67e74705SXin Li           (__mmask8) __U,
8299*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8300*67e74705SXin Li }
8301*67e74705SXin Li 
8302*67e74705SXin Li #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8303*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8304*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
8305*67e74705SXin Li                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8306*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
8307*67e74705SXin Li 
8308*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_ss(__m128 __W,__m128 __X,__m128 __Y,__mmask8 __U)8309*67e74705SXin Li _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8310*67e74705SXin Li {
8311*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8312*67e74705SXin Li           (__v4sf) __X,
8313*67e74705SXin Li           (__v4sf) __Y,
8314*67e74705SXin Li           (__mmask8) __U,
8315*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8316*67e74705SXin Li }
8317*67e74705SXin Li 
8318*67e74705SXin Li #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8319*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8320*67e74705SXin Li                                          (__v4sf)(__m128)(X), \
8321*67e74705SXin Li                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8322*67e74705SXin Li                                          (int)(R)); })
8323*67e74705SXin Li 
8324*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fmsub_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)8325*67e74705SXin Li _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8326*67e74705SXin Li {
8327*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
8328*67e74705SXin Li           -(__v4sf) __B,
8329*67e74705SXin Li           (__v4sf) __W,
8330*67e74705SXin Li           (__mmask8) __U,
8331*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8332*67e74705SXin Li }
8333*67e74705SXin Li 
8334*67e74705SXin Li #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8335*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
8336*67e74705SXin Li                                         -(__v4sf)(__m128)(B), \
8337*67e74705SXin Li                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
8338*67e74705SXin Li                                         (int)(R)); })
8339*67e74705SXin Li 
8340*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_ss(__mmask8 __U,__m128 __A,__m128 __B,__m128 __C)8341*67e74705SXin Li _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8342*67e74705SXin Li {
8343*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8344*67e74705SXin Li           (__v4sf) __B,
8345*67e74705SXin Li           -(__v4sf) __C,
8346*67e74705SXin Li           (__mmask8) __U,
8347*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8348*67e74705SXin Li }
8349*67e74705SXin Li 
8350*67e74705SXin Li #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8351*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8352*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
8353*67e74705SXin Li                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8354*67e74705SXin Li                                          (int)(R)); })
8355*67e74705SXin Li 
8356*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_ss(__m128 __W,__m128 __X,__m128 __Y,__mmask8 __U)8357*67e74705SXin Li _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8358*67e74705SXin Li {
8359*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8360*67e74705SXin Li           (__v4sf) __X,
8361*67e74705SXin Li           -(__v4sf) __Y,
8362*67e74705SXin Li           (__mmask8) __U,
8363*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8364*67e74705SXin Li }
8365*67e74705SXin Li 
8366*67e74705SXin Li #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8367*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8368*67e74705SXin Li                                          (__v4sf)(__m128)(X), \
8369*67e74705SXin Li                                          -(__v4sf)(__m128)(Y), (__mmask8)(U), \
8370*67e74705SXin Li                                          (int)(R)); })
8371*67e74705SXin Li 
8372*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)8373*67e74705SXin Li _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8374*67e74705SXin Li {
8375*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
8376*67e74705SXin Li           (__v4sf) __B,
8377*67e74705SXin Li           (__v4sf) __W,
8378*67e74705SXin Li           (__mmask8) __U,
8379*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8380*67e74705SXin Li }
8381*67e74705SXin Li 
8382*67e74705SXin Li #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8383*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
8384*67e74705SXin Li                                         (__v4sf)(__m128)(B), \
8385*67e74705SXin Li                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
8386*67e74705SXin Li                                         (int)(R)); })
8387*67e74705SXin Li 
8388*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_ss(__mmask8 __U,__m128 __A,__m128 __B,__m128 __C)8389*67e74705SXin Li _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8390*67e74705SXin Li {
8391*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8392*67e74705SXin Li           (__v4sf) __B,
8393*67e74705SXin Li           (__v4sf) __C,
8394*67e74705SXin Li           (__mmask8) __U,
8395*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8396*67e74705SXin Li }
8397*67e74705SXin Li 
8398*67e74705SXin Li #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8399*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8400*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
8401*67e74705SXin Li                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8402*67e74705SXin Li                                          (int)(R)); })
8403*67e74705SXin Li 
8404*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_ss(__m128 __W,__m128 __X,__m128 __Y,__mmask8 __U)8405*67e74705SXin Li _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8406*67e74705SXin Li {
8407*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8408*67e74705SXin Li           (__v4sf) __X,
8409*67e74705SXin Li           (__v4sf) __Y,
8410*67e74705SXin Li           (__mmask8) __U,
8411*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8412*67e74705SXin Li }
8413*67e74705SXin Li 
8414*67e74705SXin Li #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8415*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8416*67e74705SXin Li                                          (__v4sf)(__m128)(X), \
8417*67e74705SXin Li                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8418*67e74705SXin Li                                          (int)(R)); })
8419*67e74705SXin Li 
8420*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B)8421*67e74705SXin Li _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8422*67e74705SXin Li {
8423*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
8424*67e74705SXin Li           -(__v4sf) __B,
8425*67e74705SXin Li           (__v4sf) __W,
8426*67e74705SXin Li           (__mmask8) __U,
8427*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8428*67e74705SXin Li }
8429*67e74705SXin Li 
8430*67e74705SXin Li #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8431*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
8432*67e74705SXin Li                                         -(__v4sf)(__m128)(B), \
8433*67e74705SXin Li                                         (__v4sf)(__m128)(W), (__mmask8)(U), \
8434*67e74705SXin Li                                         (int)(R)); })
8435*67e74705SXin Li 
8436*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_ss(__mmask8 __U,__m128 __A,__m128 __B,__m128 __C)8437*67e74705SXin Li _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8438*67e74705SXin Li {
8439*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8440*67e74705SXin Li           (__v4sf) __B,
8441*67e74705SXin Li           -(__v4sf) __C,
8442*67e74705SXin Li           (__mmask8) __U,
8443*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8444*67e74705SXin Li }
8445*67e74705SXin Li 
8446*67e74705SXin Li #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8447*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8448*67e74705SXin Li                                          (__v4sf)(__m128)(B), \
8449*67e74705SXin Li                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8450*67e74705SXin Li                                          _MM_FROUND_CUR_DIRECTION); })
8451*67e74705SXin Li 
8452*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_ss(__m128 __W,__m128 __X,__m128 __Y,__mmask8 __U)8453*67e74705SXin Li _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8454*67e74705SXin Li {
8455*67e74705SXin Li  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8456*67e74705SXin Li           (__v4sf) __X,
8457*67e74705SXin Li           -(__v4sf) __Y,
8458*67e74705SXin Li           (__mmask8) __U,
8459*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8460*67e74705SXin Li }
8461*67e74705SXin Li 
8462*67e74705SXin Li #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8463*67e74705SXin Li   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8464*67e74705SXin Li                                          (__v4sf)(__m128)(X), \
8465*67e74705SXin Li                                          -(__v4sf)(__m128)(Y), (__mmask8)(U), \
8466*67e74705SXin Li                                          (int)(R)); })
8467*67e74705SXin Li 
8468*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmadd_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)8469*67e74705SXin Li _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8470*67e74705SXin Li {
8471*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
8472*67e74705SXin Li           (__v2df) __B,
8473*67e74705SXin Li           (__v2df) __W,
8474*67e74705SXin Li           (__mmask8) __U,
8475*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8476*67e74705SXin Li }
8477*67e74705SXin Li 
8478*67e74705SXin Li #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8479*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8480*67e74705SXin Li                                          (__v2df)(__m128d)(B), \
8481*67e74705SXin Li                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
8482*67e74705SXin Li                                          (int)(R)); })
8483*67e74705SXin Li 
8484*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmadd_sd(__mmask8 __U,__m128d __A,__m128d __B,__m128d __C)8485*67e74705SXin Li _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8486*67e74705SXin Li {
8487*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8488*67e74705SXin Li           (__v2df) __B,
8489*67e74705SXin Li           (__v2df) __C,
8490*67e74705SXin Li           (__mmask8) __U,
8491*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8492*67e74705SXin Li }
8493*67e74705SXin Li 
8494*67e74705SXin Li #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8495*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8496*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
8497*67e74705SXin Li                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8498*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
8499*67e74705SXin Li 
8500*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmadd_sd(__m128d __W,__m128d __X,__m128d __Y,__mmask8 __U)8501*67e74705SXin Li _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8502*67e74705SXin Li {
8503*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8504*67e74705SXin Li           (__v2df) __X,
8505*67e74705SXin Li           (__v2df) __Y,
8506*67e74705SXin Li           (__mmask8) __U,
8507*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8508*67e74705SXin Li }
8509*67e74705SXin Li 
8510*67e74705SXin Li #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8511*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8512*67e74705SXin Li                                           (__v2df)(__m128d)(X), \
8513*67e74705SXin Li                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8514*67e74705SXin Li                                           (int)(R)); })
8515*67e74705SXin Li 
8516*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fmsub_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)8517*67e74705SXin Li _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8518*67e74705SXin Li {
8519*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
8520*67e74705SXin Li           -(__v2df) __B,
8521*67e74705SXin Li           (__v2df) __W,
8522*67e74705SXin Li           (__mmask8) __U,
8523*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8524*67e74705SXin Li }
8525*67e74705SXin Li 
8526*67e74705SXin Li #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8527*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8528*67e74705SXin Li                                          -(__v2df)(__m128d)(B), \
8529*67e74705SXin Li                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
8530*67e74705SXin Li                                          (int)(R)); })
8531*67e74705SXin Li 
8532*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fmsub_sd(__mmask8 __U,__m128d __A,__m128d __B,__m128d __C)8533*67e74705SXin Li _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8534*67e74705SXin Li {
8535*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8536*67e74705SXin Li           (__v2df) __B,
8537*67e74705SXin Li           -(__v2df) __C,
8538*67e74705SXin Li           (__mmask8) __U,
8539*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8540*67e74705SXin Li }
8541*67e74705SXin Li 
8542*67e74705SXin Li #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8543*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8544*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
8545*67e74705SXin Li                                           -(__v2df)(__m128d)(C), \
8546*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
8547*67e74705SXin Li 
8548*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fmsub_sd(__m128d __W,__m128d __X,__m128d __Y,__mmask8 __U)8549*67e74705SXin Li _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8550*67e74705SXin Li {
8551*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8552*67e74705SXin Li           (__v2df) __X,
8553*67e74705SXin Li           -(__v2df) __Y,
8554*67e74705SXin Li           (__mmask8) __U,
8555*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8556*67e74705SXin Li }
8557*67e74705SXin Li 
8558*67e74705SXin Li #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8559*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8560*67e74705SXin Li                                           (__v2df)(__m128d)(X), \
8561*67e74705SXin Li                                           -(__v2df)(__m128d)(Y), \
8562*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
8563*67e74705SXin Li 
8564*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmadd_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)8565*67e74705SXin Li _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8566*67e74705SXin Li {
8567*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
8568*67e74705SXin Li           (__v2df) __B,
8569*67e74705SXin Li           (__v2df) __W,
8570*67e74705SXin Li           (__mmask8) __U,
8571*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8572*67e74705SXin Li }
8573*67e74705SXin Li 
8574*67e74705SXin Li #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8575*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
8576*67e74705SXin Li                                          (__v2df)(__m128d)(B), \
8577*67e74705SXin Li                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
8578*67e74705SXin Li                                          (int)(R)); })
8579*67e74705SXin Li 
8580*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmadd_sd(__mmask8 __U,__m128d __A,__m128d __B,__m128d __C)8581*67e74705SXin Li _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8582*67e74705SXin Li {
8583*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8584*67e74705SXin Li           (__v2df) __B,
8585*67e74705SXin Li           (__v2df) __C,
8586*67e74705SXin Li           (__mmask8) __U,
8587*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8588*67e74705SXin Li }
8589*67e74705SXin Li 
8590*67e74705SXin Li #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8591*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8592*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
8593*67e74705SXin Li                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8594*67e74705SXin Li                                           (int)(R)); })
8595*67e74705SXin Li 
8596*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmadd_sd(__m128d __W,__m128d __X,__m128d __Y,__mmask8 __U)8597*67e74705SXin Li _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8598*67e74705SXin Li {
8599*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8600*67e74705SXin Li           (__v2df) __X,
8601*67e74705SXin Li           (__v2df) __Y,
8602*67e74705SXin Li           (__mmask8) __U,
8603*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8604*67e74705SXin Li }
8605*67e74705SXin Li 
8606*67e74705SXin Li #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8607*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8608*67e74705SXin Li                                           (__v2df)(__m128d)(X), \
8609*67e74705SXin Li                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8610*67e74705SXin Li                                           (int)(R)); })
8611*67e74705SXin Li 
8612*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_fnmsub_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B)8613*67e74705SXin Li _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8614*67e74705SXin Li {
8615*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
8616*67e74705SXin Li           -(__v2df) __B,
8617*67e74705SXin Li           (__v2df) __W,
8618*67e74705SXin Li           (__mmask8) __U,
8619*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8620*67e74705SXin Li }
8621*67e74705SXin Li 
8622*67e74705SXin Li #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8623*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
8624*67e74705SXin Li                                          -(__v2df)(__m128d)(B), \
8625*67e74705SXin Li                                          (__v2df)(__m128d)(W), (__mmask8)(U), \
8626*67e74705SXin Li                                          (int)(R)); })
8627*67e74705SXin Li 
8628*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_fnmsub_sd(__mmask8 __U,__m128d __A,__m128d __B,__m128d __C)8629*67e74705SXin Li _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8630*67e74705SXin Li {
8631*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8632*67e74705SXin Li           (__v2df) __B,
8633*67e74705SXin Li           -(__v2df) __C,
8634*67e74705SXin Li           (__mmask8) __U,
8635*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8636*67e74705SXin Li }
8637*67e74705SXin Li 
8638*67e74705SXin Li #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8639*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8640*67e74705SXin Li                                           (__v2df)(__m128d)(B), \
8641*67e74705SXin Li                                           -(__v2df)(__m128d)(C), \
8642*67e74705SXin Li                                           (__mmask8)(U), \
8643*67e74705SXin Li                                           _MM_FROUND_CUR_DIRECTION); })
8644*67e74705SXin Li 
8645*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask3_fnmsub_sd(__m128d __W,__m128d __X,__m128d __Y,__mmask8 __U)8646*67e74705SXin Li _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8647*67e74705SXin Li {
8648*67e74705SXin Li  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W),
8649*67e74705SXin Li           (__v2df) __X,
8650*67e74705SXin Li           -(__v2df) (__Y),
8651*67e74705SXin Li           (__mmask8) __U,
8652*67e74705SXin Li           _MM_FROUND_CUR_DIRECTION);
8653*67e74705SXin Li }
8654*67e74705SXin Li 
8655*67e74705SXin Li #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8656*67e74705SXin Li   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8657*67e74705SXin Li                                           (__v2df)(__m128d)(X), \
8658*67e74705SXin Li                                           -(__v2df)(__m128d)(Y), \
8659*67e74705SXin Li                                           (__mmask8)(U), (int)(R)); })
8660*67e74705SXin Li 
8661*67e74705SXin Li #define _mm512_permutex_pd(X, C) __extension__ ({ \
8662*67e74705SXin Li   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8663*67e74705SXin Li                                    (__v8df)_mm512_undefined_pd(), \
8664*67e74705SXin Li                                    0 + (((C) >> 0) & 0x3), \
8665*67e74705SXin Li                                    0 + (((C) >> 2) & 0x3), \
8666*67e74705SXin Li                                    0 + (((C) >> 4) & 0x3), \
8667*67e74705SXin Li                                    0 + (((C) >> 6) & 0x3), \
8668*67e74705SXin Li                                    4 + (((C) >> 0) & 0x3), \
8669*67e74705SXin Li                                    4 + (((C) >> 2) & 0x3), \
8670*67e74705SXin Li                                    4 + (((C) >> 4) & 0x3), \
8671*67e74705SXin Li                                    4 + (((C) >> 6) & 0x3)); })
8672*67e74705SXin Li 
8673*67e74705SXin Li #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8674*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8675*67e74705SXin Li                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8676*67e74705SXin Li                                        (__v8df)(__m512d)(W)); })
8677*67e74705SXin Li 
8678*67e74705SXin Li #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8679*67e74705SXin Li   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8680*67e74705SXin Li                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8681*67e74705SXin Li                                        (__v8df)_mm512_setzero_pd()); })
8682*67e74705SXin Li 
8683*67e74705SXin Li #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8684*67e74705SXin Li   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8685*67e74705SXin Li                                    (__v8di)_mm512_undefined_epi32(), \
8686*67e74705SXin Li                                    0 + (((C) >> 0) & 0x3), \
8687*67e74705SXin Li                                    0 + (((C) >> 2) & 0x3), \
8688*67e74705SXin Li                                    0 + (((C) >> 4) & 0x3), \
8689*67e74705SXin Li                                    0 + (((C) >> 6) & 0x3), \
8690*67e74705SXin Li                                    4 + (((C) >> 0) & 0x3), \
8691*67e74705SXin Li                                    4 + (((C) >> 2) & 0x3), \
8692*67e74705SXin Li                                    4 + (((C) >> 4) & 0x3), \
8693*67e74705SXin Li                                    4 + (((C) >> 6) & 0x3)); })
8694*67e74705SXin Li 
8695*67e74705SXin Li #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8696*67e74705SXin Li   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8697*67e74705SXin Li                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8698*67e74705SXin Li                                       (__v8di)(__m512i)(W)); })
8699*67e74705SXin Li 
8700*67e74705SXin Li #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8701*67e74705SXin Li   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8702*67e74705SXin Li                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8703*67e74705SXin Li                                       (__v8di)_mm512_setzero_si512()); })
8704*67e74705SXin Li 
8705*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_permutexvar_pd(__m512i __X,__m512d __Y)8706*67e74705SXin Li _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8707*67e74705SXin Li {
8708*67e74705SXin Li   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8709*67e74705SXin Li                  (__v8di) __X,
8710*67e74705SXin Li                  (__v8df) _mm512_undefined_pd (),
8711*67e74705SXin Li                  (__mmask8) -1);
8712*67e74705SXin Li }
8713*67e74705SXin Li 
8714*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_pd(__m512d __W,__mmask8 __U,__m512i __X,__m512d __Y)8715*67e74705SXin Li _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8716*67e74705SXin Li {
8717*67e74705SXin Li   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8718*67e74705SXin Li                  (__v8di) __X,
8719*67e74705SXin Li                  (__v8df) __W,
8720*67e74705SXin Li                  (__mmask8) __U);
8721*67e74705SXin Li }
8722*67e74705SXin Li 
8723*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_pd(__mmask8 __U,__m512i __X,__m512d __Y)8724*67e74705SXin Li _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8725*67e74705SXin Li {
8726*67e74705SXin Li   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8727*67e74705SXin Li                  (__v8di) __X,
8728*67e74705SXin Li                  (__v8df) _mm512_setzero_pd (),
8729*67e74705SXin Li                  (__mmask8) __U);
8730*67e74705SXin Li }
8731*67e74705SXin Li 
8732*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi64(__mmask8 __M,__m512i __X,__m512i __Y)8733*67e74705SXin Li _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8734*67e74705SXin Li {
8735*67e74705SXin Li   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8736*67e74705SXin Li                  (__v8di) __X,
8737*67e74705SXin Li                  (__v8di) _mm512_setzero_si512 (),
8738*67e74705SXin Li                  __M);
8739*67e74705SXin Li }
8740*67e74705SXin Li 
8741*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi64(__m512i __X,__m512i __Y)8742*67e74705SXin Li _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8743*67e74705SXin Li {
8744*67e74705SXin Li   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8745*67e74705SXin Li                  (__v8di) __X,
8746*67e74705SXin Li                  (__v8di) _mm512_undefined_epi32 (),
8747*67e74705SXin Li                  (__mmask8) -1);
8748*67e74705SXin Li }
8749*67e74705SXin Li 
8750*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_epi64(__m512i __W,__mmask8 __M,__m512i __X,__m512i __Y)8751*67e74705SXin Li _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8752*67e74705SXin Li              __m512i __Y)
8753*67e74705SXin Li {
8754*67e74705SXin Li   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8755*67e74705SXin Li                  (__v8di) __X,
8756*67e74705SXin Li                  (__v8di) __W,
8757*67e74705SXin Li                  __M);
8758*67e74705SXin Li }
8759*67e74705SXin Li 
8760*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_permutexvar_ps(__m512i __X,__m512 __Y)8761*67e74705SXin Li _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8762*67e74705SXin Li {
8763*67e74705SXin Li   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8764*67e74705SXin Li                 (__v16si) __X,
8765*67e74705SXin Li                 (__v16sf) _mm512_undefined_ps (),
8766*67e74705SXin Li                 (__mmask16) -1);
8767*67e74705SXin Li }
8768*67e74705SXin Li 
8769*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_ps(__m512 __W,__mmask16 __U,__m512i __X,__m512 __Y)8770*67e74705SXin Li _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8771*67e74705SXin Li {
8772*67e74705SXin Li   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8773*67e74705SXin Li                 (__v16si) __X,
8774*67e74705SXin Li                 (__v16sf) __W,
8775*67e74705SXin Li                 (__mmask16) __U);
8776*67e74705SXin Li }
8777*67e74705SXin Li 
8778*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_ps(__mmask16 __U,__m512i __X,__m512 __Y)8779*67e74705SXin Li _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8780*67e74705SXin Li {
8781*67e74705SXin Li   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8782*67e74705SXin Li                 (__v16si) __X,
8783*67e74705SXin Li                 (__v16sf) _mm512_setzero_ps (),
8784*67e74705SXin Li                 (__mmask16) __U);
8785*67e74705SXin Li }
8786*67e74705SXin Li 
8787*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi32(__mmask16 __M,__m512i __X,__m512i __Y)8788*67e74705SXin Li _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8789*67e74705SXin Li {
8790*67e74705SXin Li   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8791*67e74705SXin Li                  (__v16si) __X,
8792*67e74705SXin Li                  (__v16si) _mm512_setzero_si512 (),
8793*67e74705SXin Li                  __M);
8794*67e74705SXin Li }
8795*67e74705SXin Li 
8796*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi32(__m512i __X,__m512i __Y)8797*67e74705SXin Li _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8798*67e74705SXin Li {
8799*67e74705SXin Li   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8800*67e74705SXin Li                  (__v16si) __X,
8801*67e74705SXin Li                  (__v16si) _mm512_undefined_epi32 (),
8802*67e74705SXin Li                  (__mmask16) -1);
8803*67e74705SXin Li }
8804*67e74705SXin Li 
8805*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_epi32(__m512i __W,__mmask16 __M,__m512i __X,__m512i __Y)8806*67e74705SXin Li _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8807*67e74705SXin Li              __m512i __Y)
8808*67e74705SXin Li {
8809*67e74705SXin Li   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8810*67e74705SXin Li                  (__v16si) __X,
8811*67e74705SXin Li                  (__v16si) __W,
8812*67e74705SXin Li                  __M);
8813*67e74705SXin Li }
8814*67e74705SXin Li 
8815*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kand(__mmask16 __A,__mmask16 __B)8816*67e74705SXin Li _mm512_kand (__mmask16 __A, __mmask16 __B)
8817*67e74705SXin Li {
8818*67e74705SXin Li   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8819*67e74705SXin Li }
8820*67e74705SXin Li 
8821*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kandn(__mmask16 __A,__mmask16 __B)8822*67e74705SXin Li _mm512_kandn (__mmask16 __A, __mmask16 __B)
8823*67e74705SXin Li {
8824*67e74705SXin Li   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8825*67e74705SXin Li }
8826*67e74705SXin Li 
8827*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kor(__mmask16 __A,__mmask16 __B)8828*67e74705SXin Li _mm512_kor (__mmask16 __A, __mmask16 __B)
8829*67e74705SXin Li {
8830*67e74705SXin Li   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8831*67e74705SXin Li }
8832*67e74705SXin Li 
8833*67e74705SXin Li static __inline__ int __DEFAULT_FN_ATTRS
_mm512_kortestc(__mmask16 __A,__mmask16 __B)8834*67e74705SXin Li _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8835*67e74705SXin Li {
8836*67e74705SXin Li   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8837*67e74705SXin Li }
8838*67e74705SXin Li 
8839*67e74705SXin Li static __inline__ int __DEFAULT_FN_ATTRS
_mm512_kortestz(__mmask16 __A,__mmask16 __B)8840*67e74705SXin Li _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8841*67e74705SXin Li {
8842*67e74705SXin Li   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8843*67e74705SXin Li }
8844*67e74705SXin Li 
8845*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kunpackb(__mmask16 __A,__mmask16 __B)8846*67e74705SXin Li _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8847*67e74705SXin Li {
8848*67e74705SXin Li   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8849*67e74705SXin Li }
8850*67e74705SXin Li 
8851*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kxnor(__mmask16 __A,__mmask16 __B)8852*67e74705SXin Li _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8853*67e74705SXin Li {
8854*67e74705SXin Li   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8855*67e74705SXin Li }
8856*67e74705SXin Li 
8857*67e74705SXin Li static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kxor(__mmask16 __A,__mmask16 __B)8858*67e74705SXin Li _mm512_kxor (__mmask16 __A, __mmask16 __B)
8859*67e74705SXin Li {
8860*67e74705SXin Li   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8861*67e74705SXin Li }
8862*67e74705SXin Li 
8863*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_si512(__m512i * __P,__m512i __A)8864*67e74705SXin Li _mm512_stream_si512 (__m512i * __P, __m512i __A)
8865*67e74705SXin Li {
8866*67e74705SXin Li   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
8867*67e74705SXin Li }
8868*67e74705SXin Li 
8869*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_stream_load_si512(void * __P)8870*67e74705SXin Li _mm512_stream_load_si512 (void *__P)
8871*67e74705SXin Li {
8872*67e74705SXin Li   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8873*67e74705SXin Li }
8874*67e74705SXin Li 
8875*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_pd(double * __P,__m512d __A)8876*67e74705SXin Li _mm512_stream_pd (double *__P, __m512d __A)
8877*67e74705SXin Li {
8878*67e74705SXin Li   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
8879*67e74705SXin Li }
8880*67e74705SXin Li 
8881*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_ps(float * __P,__m512 __A)8882*67e74705SXin Li _mm512_stream_ps (float *__P, __m512 __A)
8883*67e74705SXin Li {
8884*67e74705SXin Li   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
8885*67e74705SXin Li }
8886*67e74705SXin Li 
8887*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_compress_pd(__m512d __W,__mmask8 __U,__m512d __A)8888*67e74705SXin Li _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8889*67e74705SXin Li {
8890*67e74705SXin Li   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8891*67e74705SXin Li                   (__v8df) __W,
8892*67e74705SXin Li                   (__mmask8) __U);
8893*67e74705SXin Li }
8894*67e74705SXin Li 
8895*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_compress_pd(__mmask8 __U,__m512d __A)8896*67e74705SXin Li _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8897*67e74705SXin Li {
8898*67e74705SXin Li   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8899*67e74705SXin Li                   (__v8df)
8900*67e74705SXin Li                   _mm512_setzero_pd (),
8901*67e74705SXin Li                   (__mmask8) __U);
8902*67e74705SXin Li }
8903*67e74705SXin Li 
8904*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi64(__m512i __W,__mmask8 __U,__m512i __A)8905*67e74705SXin Li _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8906*67e74705SXin Li {
8907*67e74705SXin Li   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8908*67e74705SXin Li                   (__v8di) __W,
8909*67e74705SXin Li                   (__mmask8) __U);
8910*67e74705SXin Li }
8911*67e74705SXin Li 
8912*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi64(__mmask8 __U,__m512i __A)8913*67e74705SXin Li _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8914*67e74705SXin Li {
8915*67e74705SXin Li   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8916*67e74705SXin Li                   (__v8di)
8917*67e74705SXin Li                   _mm512_setzero_si512 (),
8918*67e74705SXin Li                   (__mmask8) __U);
8919*67e74705SXin Li }
8920*67e74705SXin Li 
8921*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_compress_ps(__m512 __W,__mmask16 __U,__m512 __A)8922*67e74705SXin Li _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8923*67e74705SXin Li {
8924*67e74705SXin Li   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8925*67e74705SXin Li                  (__v16sf) __W,
8926*67e74705SXin Li                  (__mmask16) __U);
8927*67e74705SXin Li }
8928*67e74705SXin Li 
8929*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_compress_ps(__mmask16 __U,__m512 __A)8930*67e74705SXin Li _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8931*67e74705SXin Li {
8932*67e74705SXin Li   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8933*67e74705SXin Li                  (__v16sf)
8934*67e74705SXin Li                  _mm512_setzero_ps (),
8935*67e74705SXin Li                  (__mmask16) __U);
8936*67e74705SXin Li }
8937*67e74705SXin Li 
8938*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi32(__m512i __W,__mmask16 __U,__m512i __A)8939*67e74705SXin Li _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8940*67e74705SXin Li {
8941*67e74705SXin Li   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8942*67e74705SXin Li                   (__v16si) __W,
8943*67e74705SXin Li                   (__mmask16) __U);
8944*67e74705SXin Li }
8945*67e74705SXin Li 
8946*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi32(__mmask16 __U,__m512i __A)8947*67e74705SXin Li _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8948*67e74705SXin Li {
8949*67e74705SXin Li   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8950*67e74705SXin Li                   (__v16si)
8951*67e74705SXin Li                   _mm512_setzero_si512 (),
8952*67e74705SXin Li                   (__mmask16) __U);
8953*67e74705SXin Li }
8954*67e74705SXin Li 
8955*67e74705SXin Li #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
8956*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8957*67e74705SXin Li                                       (__v4sf)(__m128)(Y), (int)(P), \
8958*67e74705SXin Li                                       (__mmask8)-1, (int)(R)); })
8959*67e74705SXin Li 
8960*67e74705SXin Li #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
8961*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8962*67e74705SXin Li                                       (__v4sf)(__m128)(Y), (int)(P), \
8963*67e74705SXin Li                                       (__mmask8)(M), (int)(R)); })
8964*67e74705SXin Li 
8965*67e74705SXin Li #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
8966*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8967*67e74705SXin Li                                       (__v4sf)(__m128)(Y), (int)(P), \
8968*67e74705SXin Li                                       (__mmask8)-1, \
8969*67e74705SXin Li                                       _MM_FROUND_CUR_DIRECTION); })
8970*67e74705SXin Li 
8971*67e74705SXin Li #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
8972*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8973*67e74705SXin Li                                       (__v4sf)(__m128)(Y), (int)(P), \
8974*67e74705SXin Li                                       (__mmask8)(M), \
8975*67e74705SXin Li                                       _MM_FROUND_CUR_DIRECTION); })
8976*67e74705SXin Li 
8977*67e74705SXin Li #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
8978*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8979*67e74705SXin Li                                       (__v2df)(__m128d)(Y), (int)(P), \
8980*67e74705SXin Li                                       (__mmask8)-1, (int)(R)); })
8981*67e74705SXin Li 
8982*67e74705SXin Li #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
8983*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8984*67e74705SXin Li                                       (__v2df)(__m128d)(Y), (int)(P), \
8985*67e74705SXin Li                                       (__mmask8)(M), (int)(R)); })
8986*67e74705SXin Li 
8987*67e74705SXin Li #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
8988*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8989*67e74705SXin Li                                       (__v2df)(__m128d)(Y), (int)(P), \
8990*67e74705SXin Li                                       (__mmask8)-1, \
8991*67e74705SXin Li                                       _MM_FROUND_CUR_DIRECTION); })
8992*67e74705SXin Li 
8993*67e74705SXin Li #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
8994*67e74705SXin Li   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8995*67e74705SXin Li                                       (__v2df)(__m128d)(Y), (int)(P), \
8996*67e74705SXin Li                                       (__mmask8)(M), \
8997*67e74705SXin Li                                       _MM_FROUND_CUR_DIRECTION); })
8998*67e74705SXin Li 
8999*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_movehdup_ps(__m512 __A)9000*67e74705SXin Li _mm512_movehdup_ps (__m512 __A)
9001*67e74705SXin Li {
9002*67e74705SXin Li   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9003*67e74705SXin Li                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9004*67e74705SXin Li }
9005*67e74705SXin Li 
9006*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_movehdup_ps(__m512 __W,__mmask16 __U,__m512 __A)9007*67e74705SXin Li _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9008*67e74705SXin Li {
9009*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9010*67e74705SXin Li                                              (__v16sf)_mm512_movehdup_ps(__A),
9011*67e74705SXin Li                                              (__v16sf)__W);
9012*67e74705SXin Li }
9013*67e74705SXin Li 
9014*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_movehdup_ps(__mmask16 __U,__m512 __A)9015*67e74705SXin Li _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9016*67e74705SXin Li {
9017*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9018*67e74705SXin Li                                              (__v16sf)_mm512_movehdup_ps(__A),
9019*67e74705SXin Li                                              (__v16sf)_mm512_setzero_ps());
9020*67e74705SXin Li }
9021*67e74705SXin Li 
9022*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_moveldup_ps(__m512 __A)9023*67e74705SXin Li _mm512_moveldup_ps (__m512 __A)
9024*67e74705SXin Li {
9025*67e74705SXin Li   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9026*67e74705SXin Li                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9027*67e74705SXin Li }
9028*67e74705SXin Li 
9029*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_moveldup_ps(__m512 __W,__mmask16 __U,__m512 __A)9030*67e74705SXin Li _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9031*67e74705SXin Li {
9032*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9033*67e74705SXin Li                                              (__v16sf)_mm512_moveldup_ps(__A),
9034*67e74705SXin Li                                              (__v16sf)__W);
9035*67e74705SXin Li }
9036*67e74705SXin Li 
9037*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_moveldup_ps(__mmask16 __U,__m512 __A)9038*67e74705SXin Li _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9039*67e74705SXin Li {
9040*67e74705SXin Li   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9041*67e74705SXin Li                                              (__v16sf)_mm512_moveldup_ps(__A),
9042*67e74705SXin Li                                              (__v16sf)_mm512_setzero_ps());
9043*67e74705SXin Li }
9044*67e74705SXin Li 
9045*67e74705SXin Li #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9046*67e74705SXin Li   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9047*67e74705SXin Li                                    (__v16si)_mm512_undefined_epi32(), \
9048*67e74705SXin Li                                    0  + (((I) >> 0) & 0x3), \
9049*67e74705SXin Li                                    0  + (((I) >> 2) & 0x3), \
9050*67e74705SXin Li                                    0  + (((I) >> 4) & 0x3), \
9051*67e74705SXin Li                                    0  + (((I) >> 6) & 0x3), \
9052*67e74705SXin Li                                    4  + (((I) >> 0) & 0x3), \
9053*67e74705SXin Li                                    4  + (((I) >> 2) & 0x3), \
9054*67e74705SXin Li                                    4  + (((I) >> 4) & 0x3), \
9055*67e74705SXin Li                                    4  + (((I) >> 6) & 0x3), \
9056*67e74705SXin Li                                    8  + (((I) >> 0) & 0x3), \
9057*67e74705SXin Li                                    8  + (((I) >> 2) & 0x3), \
9058*67e74705SXin Li                                    8  + (((I) >> 4) & 0x3), \
9059*67e74705SXin Li                                    8  + (((I) >> 6) & 0x3), \
9060*67e74705SXin Li                                    12 + (((I) >> 0) & 0x3), \
9061*67e74705SXin Li                                    12 + (((I) >> 2) & 0x3), \
9062*67e74705SXin Li                                    12 + (((I) >> 4) & 0x3), \
9063*67e74705SXin Li                                    12 + (((I) >> 6) & 0x3)); })
9064*67e74705SXin Li 
9065*67e74705SXin Li #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9066*67e74705SXin Li   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9067*67e74705SXin Li                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9068*67e74705SXin Li                                       (__v16si)(__m512i)(W)); })
9069*67e74705SXin Li 
9070*67e74705SXin Li #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9071*67e74705SXin Li   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9072*67e74705SXin Li                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9073*67e74705SXin Li                                       (__v16si)_mm512_setzero_si512()); })
9074*67e74705SXin Li 
9075*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_expand_pd(__m512d __W,__mmask8 __U,__m512d __A)9076*67e74705SXin Li _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9077*67e74705SXin Li {
9078*67e74705SXin Li   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9079*67e74705SXin Li                 (__v8df) __W,
9080*67e74705SXin Li                 (__mmask8) __U);
9081*67e74705SXin Li }
9082*67e74705SXin Li 
9083*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_expand_pd(__mmask8 __U,__m512d __A)9084*67e74705SXin Li _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9085*67e74705SXin Li {
9086*67e74705SXin Li   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9087*67e74705SXin Li                 (__v8df) _mm512_setzero_pd (),
9088*67e74705SXin Li                 (__mmask8) __U);
9089*67e74705SXin Li }
9090*67e74705SXin Li 
9091*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi64(__m512i __W,__mmask8 __U,__m512i __A)9092*67e74705SXin Li _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9093*67e74705SXin Li {
9094*67e74705SXin Li   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9095*67e74705SXin Li                 (__v8di) __W,
9096*67e74705SXin Li                 (__mmask8) __U);
9097*67e74705SXin Li }
9098*67e74705SXin Li 
9099*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi64(__mmask8 __U,__m512i __A)9100*67e74705SXin Li _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9101*67e74705SXin Li {
9102*67e74705SXin Li   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9103*67e74705SXin Li                 (__v8di) _mm512_setzero_pd (),
9104*67e74705SXin Li                 (__mmask8) __U);
9105*67e74705SXin Li }
9106*67e74705SXin Li 
9107*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_pd(__m512d __W,__mmask8 __U,void const * __P)9108*67e74705SXin Li _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9109*67e74705SXin Li {
9110*67e74705SXin Li   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9111*67e74705SXin Li               (__v8df) __W,
9112*67e74705SXin Li               (__mmask8) __U);
9113*67e74705SXin Li }
9114*67e74705SXin Li 
9115*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_pd(__mmask8 __U,void const * __P)9116*67e74705SXin Li _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9117*67e74705SXin Li {
9118*67e74705SXin Li   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9119*67e74705SXin Li               (__v8df) _mm512_setzero_pd(),
9120*67e74705SXin Li               (__mmask8) __U);
9121*67e74705SXin Li }
9122*67e74705SXin Li 
9123*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi64(__m512i __W,__mmask8 __U,void const * __P)9124*67e74705SXin Li _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9125*67e74705SXin Li {
9126*67e74705SXin Li   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9127*67e74705SXin Li               (__v8di) __W,
9128*67e74705SXin Li               (__mmask8) __U);
9129*67e74705SXin Li }
9130*67e74705SXin Li 
9131*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi64(__mmask8 __U,void const * __P)9132*67e74705SXin Li _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9133*67e74705SXin Li {
9134*67e74705SXin Li   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9135*67e74705SXin Li               (__v8di) _mm512_setzero_pd(),
9136*67e74705SXin Li               (__mmask8) __U);
9137*67e74705SXin Li }
9138*67e74705SXin Li 
9139*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_ps(__m512 __W,__mmask16 __U,void const * __P)9140*67e74705SXin Li _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9141*67e74705SXin Li {
9142*67e74705SXin Li   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9143*67e74705SXin Li                    (__v16sf) __W,
9144*67e74705SXin Li                    (__mmask16) __U);
9145*67e74705SXin Li }
9146*67e74705SXin Li 
9147*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_ps(__mmask16 __U,void const * __P)9148*67e74705SXin Li _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9149*67e74705SXin Li {
9150*67e74705SXin Li   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9151*67e74705SXin Li                    (__v16sf) _mm512_setzero_ps(),
9152*67e74705SXin Li                    (__mmask16) __U);
9153*67e74705SXin Li }
9154*67e74705SXin Li 
9155*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi32(__m512i __W,__mmask16 __U,void const * __P)9156*67e74705SXin Li _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9157*67e74705SXin Li {
9158*67e74705SXin Li   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9159*67e74705SXin Li               (__v16si) __W,
9160*67e74705SXin Li               (__mmask16) __U);
9161*67e74705SXin Li }
9162*67e74705SXin Li 
9163*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi32(__mmask16 __U,void const * __P)9164*67e74705SXin Li _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9165*67e74705SXin Li {
9166*67e74705SXin Li   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9167*67e74705SXin Li               (__v16si) _mm512_setzero_ps(),
9168*67e74705SXin Li               (__mmask16) __U);
9169*67e74705SXin Li }
9170*67e74705SXin Li 
9171*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_expand_ps(__m512 __W,__mmask16 __U,__m512 __A)9172*67e74705SXin Li _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9173*67e74705SXin Li {
9174*67e74705SXin Li   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9175*67e74705SXin Li                (__v16sf) __W,
9176*67e74705SXin Li                (__mmask16) __U);
9177*67e74705SXin Li }
9178*67e74705SXin Li 
9179*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_expand_ps(__mmask16 __U,__m512 __A)9180*67e74705SXin Li _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9181*67e74705SXin Li {
9182*67e74705SXin Li   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9183*67e74705SXin Li                (__v16sf) _mm512_setzero_ps(),
9184*67e74705SXin Li                (__mmask16) __U);
9185*67e74705SXin Li }
9186*67e74705SXin Li 
9187*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi32(__m512i __W,__mmask16 __U,__m512i __A)9188*67e74705SXin Li _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9189*67e74705SXin Li {
9190*67e74705SXin Li   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9191*67e74705SXin Li                 (__v16si) __W,
9192*67e74705SXin Li                 (__mmask16) __U);
9193*67e74705SXin Li }
9194*67e74705SXin Li 
9195*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi32(__mmask16 __U,__m512i __A)9196*67e74705SXin Li _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9197*67e74705SXin Li {
9198*67e74705SXin Li   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9199*67e74705SXin Li                 (__v16si) _mm512_setzero_ps(),
9200*67e74705SXin Li                 (__mmask16) __U);
9201*67e74705SXin Li }
9202*67e74705SXin Li 
9203*67e74705SXin Li #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9204*67e74705SXin Li   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9205*67e74705SXin Li                                            (__v8df)_mm512_undefined_pd(), \
9206*67e74705SXin Li                                            (__mmask8)-1, (int)(R)); })
9207*67e74705SXin Li 
9208*67e74705SXin Li #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9209*67e74705SXin Li   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9210*67e74705SXin Li                                            (__v8df)(__m512d)(W), \
9211*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
9212*67e74705SXin Li 
9213*67e74705SXin Li #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9214*67e74705SXin Li   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9215*67e74705SXin Li                                            (__v8df)_mm512_setzero_pd(), \
9216*67e74705SXin Li                                            (__mmask8)(U), (int)(R)); })
9217*67e74705SXin Li 
9218*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtps_pd(__m256 __A)9219*67e74705SXin Li _mm512_cvtps_pd (__m256 __A)
9220*67e74705SXin Li {
9221*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9222*67e74705SXin Li                 (__v8df)
9223*67e74705SXin Li                 _mm512_undefined_pd (),
9224*67e74705SXin Li                 (__mmask8) -1,
9225*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
9226*67e74705SXin Li }
9227*67e74705SXin Li 
9228*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_pd(__m512d __W,__mmask8 __U,__m256 __A)9229*67e74705SXin Li _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9230*67e74705SXin Li {
9231*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9232*67e74705SXin Li                 (__v8df) __W,
9233*67e74705SXin Li                 (__mmask8) __U,
9234*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
9235*67e74705SXin Li }
9236*67e74705SXin Li 
9237*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_pd(__mmask8 __U,__m256 __A)9238*67e74705SXin Li _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9239*67e74705SXin Li {
9240*67e74705SXin Li   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9241*67e74705SXin Li                 (__v8df)
9242*67e74705SXin Li                 _mm512_setzero_pd (),
9243*67e74705SXin Li                 (__mmask8) __U,
9244*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
9245*67e74705SXin Li }
9246*67e74705SXin Li 
9247*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_mov_pd(__m512d __W,__mmask8 __U,__m512d __A)9248*67e74705SXin Li _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9249*67e74705SXin Li {
9250*67e74705SXin Li   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9251*67e74705SXin Li               (__v8df) __A,
9252*67e74705SXin Li               (__v8df) __W);
9253*67e74705SXin Li }
9254*67e74705SXin Li 
9255*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_mov_pd(__mmask8 __U,__m512d __A)9256*67e74705SXin Li _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9257*67e74705SXin Li {
9258*67e74705SXin Li   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9259*67e74705SXin Li               (__v8df) __A,
9260*67e74705SXin Li               (__v8df) _mm512_setzero_pd ());
9261*67e74705SXin Li }
9262*67e74705SXin Li 
9263*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_mov_ps(__m512 __W,__mmask16 __U,__m512 __A)9264*67e74705SXin Li _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9265*67e74705SXin Li {
9266*67e74705SXin Li   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9267*67e74705SXin Li              (__v16sf) __A,
9268*67e74705SXin Li              (__v16sf) __W);
9269*67e74705SXin Li }
9270*67e74705SXin Li 
9271*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_mov_ps(__mmask16 __U,__m512 __A)9272*67e74705SXin Li _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9273*67e74705SXin Li {
9274*67e74705SXin Li   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9275*67e74705SXin Li              (__v16sf) __A,
9276*67e74705SXin Li              (__v16sf) _mm512_setzero_ps ());
9277*67e74705SXin Li }
9278*67e74705SXin Li 
9279*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_pd(void * __P,__mmask8 __U,__m512d __A)9280*67e74705SXin Li _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9281*67e74705SXin Li {
9282*67e74705SXin Li   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9283*67e74705SXin Li             (__mmask8) __U);
9284*67e74705SXin Li }
9285*67e74705SXin Li 
9286*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi64(void * __P,__mmask8 __U,__m512i __A)9287*67e74705SXin Li _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9288*67e74705SXin Li {
9289*67e74705SXin Li   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9290*67e74705SXin Li             (__mmask8) __U);
9291*67e74705SXin Li }
9292*67e74705SXin Li 
9293*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_ps(void * __P,__mmask16 __U,__m512 __A)9294*67e74705SXin Li _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9295*67e74705SXin Li {
9296*67e74705SXin Li   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9297*67e74705SXin Li             (__mmask16) __U);
9298*67e74705SXin Li }
9299*67e74705SXin Li 
9300*67e74705SXin Li static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi32(void * __P,__mmask16 __U,__m512i __A)9301*67e74705SXin Li _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9302*67e74705SXin Li {
9303*67e74705SXin Li   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9304*67e74705SXin Li             (__mmask16) __U);
9305*67e74705SXin Li }
9306*67e74705SXin Li 
9307*67e74705SXin Li #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9308*67e74705SXin Li   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9309*67e74705SXin Li                                              (__v2df)(__m128d)(B), \
9310*67e74705SXin Li                                              (__v4sf)_mm_undefined_ps(), \
9311*67e74705SXin Li                                              (__mmask8)-1, (int)(R)); })
9312*67e74705SXin Li 
9313*67e74705SXin Li #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9314*67e74705SXin Li   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9315*67e74705SXin Li                                              (__v2df)(__m128d)(B), \
9316*67e74705SXin Li                                              (__v4sf)(__m128)(W), \
9317*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
9318*67e74705SXin Li 
9319*67e74705SXin Li #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9320*67e74705SXin Li   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9321*67e74705SXin Li                                              (__v2df)(__m128d)(B), \
9322*67e74705SXin Li                                              (__v4sf)_mm_setzero_ps(), \
9323*67e74705SXin Li                                              (__mmask8)(U), (int)(R)); })
9324*67e74705SXin Li 
9325*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_cvtsd_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128d __B)9326*67e74705SXin Li _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9327*67e74705SXin Li {
9328*67e74705SXin Li   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9329*67e74705SXin Li                                              (__v2df)(__B),
9330*67e74705SXin Li                                              (__v4sf)(__W),
9331*67e74705SXin Li                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9332*67e74705SXin Li }
9333*67e74705SXin Li 
9334*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_cvtsd_ss(__mmask8 __U,__m128 __A,__m128d __B)9335*67e74705SXin Li _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9336*67e74705SXin Li {
9337*67e74705SXin Li   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9338*67e74705SXin Li                                              (__v2df)(__B),
9339*67e74705SXin Li                                              (__v4sf)_mm_setzero_ps(),
9340*67e74705SXin Li                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9341*67e74705SXin Li }
9342*67e74705SXin Li 
9343*67e74705SXin Li #define _mm_cvtss_i32 _mm_cvtss_si32
9344*67e74705SXin Li #define _mm_cvtss_i64 _mm_cvtss_si64
9345*67e74705SXin Li #define _mm_cvtsd_i32 _mm_cvtsd_si32
9346*67e74705SXin Li #define _mm_cvtsd_i64 _mm_cvtsd_si64
9347*67e74705SXin Li #define _mm_cvti32_sd _mm_cvtsi32_sd
9348*67e74705SXin Li #define _mm_cvti64_sd _mm_cvtsi64_sd
9349*67e74705SXin Li #define _mm_cvti32_ss _mm_cvtsi32_ss
9350*67e74705SXin Li #define _mm_cvti64_ss _mm_cvtsi64_ss
9351*67e74705SXin Li 
9352*67e74705SXin Li #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9353*67e74705SXin Li   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9354*67e74705SXin Li                                      (int)(R)); })
9355*67e74705SXin Li 
9356*67e74705SXin Li #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9357*67e74705SXin Li   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9358*67e74705SXin Li                                      (int)(R)); })
9359*67e74705SXin Li 
9360*67e74705SXin Li #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9361*67e74705SXin Li   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9362*67e74705SXin Li 
9363*67e74705SXin Li #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9364*67e74705SXin Li   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9365*67e74705SXin Li 
9366*67e74705SXin Li #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9367*67e74705SXin Li   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9368*67e74705SXin Li                                     (int)(R)); })
9369*67e74705SXin Li 
9370*67e74705SXin Li #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9371*67e74705SXin Li   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9372*67e74705SXin Li                                     (int)(R)); })
9373*67e74705SXin Li 
9374*67e74705SXin Li #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9375*67e74705SXin Li   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9376*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
9377*67e74705SXin Li                                               (__v2df)_mm_undefined_pd(), \
9378*67e74705SXin Li                                               (__mmask8)-1, (int)(R)); })
9379*67e74705SXin Li 
9380*67e74705SXin Li #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9381*67e74705SXin Li   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9382*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
9383*67e74705SXin Li                                               (__v2df)(__m128d)(W), \
9384*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
9385*67e74705SXin Li 
9386*67e74705SXin Li #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9387*67e74705SXin Li   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9388*67e74705SXin Li                                               (__v4sf)(__m128)(B), \
9389*67e74705SXin Li                                               (__v2df)_mm_setzero_pd(), \
9390*67e74705SXin Li                                               (__mmask8)(U), (int)(R)); })
9391*67e74705SXin Li 
9392*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_cvtss_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128 __B)9393*67e74705SXin Li _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9394*67e74705SXin Li {
9395*67e74705SXin Li   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9396*67e74705SXin Li                                               (__v4sf)(__B),
9397*67e74705SXin Li                                               (__v2df)(__W),
9398*67e74705SXin Li                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9399*67e74705SXin Li }
9400*67e74705SXin Li 
9401*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_cvtss_sd(__mmask8 __U,__m128d __A,__m128 __B)9402*67e74705SXin Li _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9403*67e74705SXin Li {
9404*67e74705SXin Li   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9405*67e74705SXin Li                                               (__v4sf)(__B),
9406*67e74705SXin Li                                               (__v2df)_mm_setzero_pd(),
9407*67e74705SXin Li                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9408*67e74705SXin Li }
9409*67e74705SXin Li 
9410*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtu32_sd(__m128d __A,unsigned __B)9411*67e74705SXin Li _mm_cvtu32_sd (__m128d __A, unsigned __B)
9412*67e74705SXin Li {
9413*67e74705SXin Li   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9414*67e74705SXin Li }
9415*67e74705SXin Li 
9416*67e74705SXin Li #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9417*67e74705SXin Li   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9418*67e74705SXin Li                                       (unsigned long long)(B), (int)(R)); })
9419*67e74705SXin Li 
9420*67e74705SXin Li static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_cvtu64_sd(__m128d __A,unsigned long long __B)9421*67e74705SXin Li _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9422*67e74705SXin Li {
9423*67e74705SXin Li   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9424*67e74705SXin Li                  _MM_FROUND_CUR_DIRECTION);
9425*67e74705SXin Li }
9426*67e74705SXin Li 
9427*67e74705SXin Li #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9428*67e74705SXin Li   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9429*67e74705SXin Li                                      (int)(R)); })
9430*67e74705SXin Li 
9431*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtu32_ss(__m128 __A,unsigned __B)9432*67e74705SXin Li _mm_cvtu32_ss (__m128 __A, unsigned __B)
9433*67e74705SXin Li {
9434*67e74705SXin Li   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9435*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
9436*67e74705SXin Li }
9437*67e74705SXin Li 
9438*67e74705SXin Li #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9439*67e74705SXin Li   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9440*67e74705SXin Li                                      (unsigned long long)(B), (int)(R)); })
9441*67e74705SXin Li 
9442*67e74705SXin Li static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_cvtu64_ss(__m128 __A,unsigned long long __B)9443*67e74705SXin Li _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9444*67e74705SXin Li {
9445*67e74705SXin Li   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9446*67e74705SXin Li                 _MM_FROUND_CUR_DIRECTION);
9447*67e74705SXin Li }
9448*67e74705SXin Li 
9449*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi32(__m512i __O,__mmask16 __M,int __A)9450*67e74705SXin Li _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9451*67e74705SXin Li {
9452*67e74705SXin Li   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
9453*67e74705SXin Li                  __M);
9454*67e74705SXin Li }
9455*67e74705SXin Li 
9456*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi64(__m512i __O,__mmask8 __M,long long __A)9457*67e74705SXin Li _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9458*67e74705SXin Li {
9459*67e74705SXin Li   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
9460*67e74705SXin Li                  __M);
9461*67e74705SXin Li }
9462*67e74705SXin Li 
9463*67e74705SXin Li static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set_epi32(int __A,int __B,int __C,int __D,int __E,int __F,int __G,int __H,int __I,int __J,int __K,int __L,int __M,int __N,int __O,int __P)9464*67e74705SXin Li _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9465*67e74705SXin Li      int __E, int __F, int __G, int __H,
9466*67e74705SXin Li      int __I, int __J, int __K, int __L,
9467*67e74705SXin Li      int __M, int __N, int __O, int __P)
9468*67e74705SXin Li {
9469*67e74705SXin Li   return __extension__ (__m512i)(__v16si)
9470*67e74705SXin Li   { __P, __O, __N, __M, __L, __K, __J, __I,
9471*67e74705SXin Li     __H, __G, __F, __E, __D, __C, __B, __A };
9472*67e74705SXin Li }
9473*67e74705SXin Li 
9474*67e74705SXin Li #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9475*67e74705SXin Li        e8,e9,e10,e11,e12,e13,e14,e15)          \
9476*67e74705SXin Li   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9477*67e74705SXin Li                    (e5),(e4),(e3),(e2),(e1),(e0))
9478*67e74705SXin Li 
9479*67e74705SXin Li static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_set_epi64(long long __A,long long __B,long long __C,long long __D,long long __E,long long __F,long long __G,long long __H)9480*67e74705SXin Li _mm512_set_epi64 (long long __A, long long __B, long long __C,
9481*67e74705SXin Li      long long __D, long long __E, long long __F,
9482*67e74705SXin Li      long long __G, long long __H)
9483*67e74705SXin Li {
9484*67e74705SXin Li   return __extension__ (__m512i) (__v8di)
9485*67e74705SXin Li   { __H, __G, __F, __E, __D, __C, __B, __A };
9486*67e74705SXin Li }
9487*67e74705SXin Li 
9488*67e74705SXin Li #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9489*67e74705SXin Li   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9490*67e74705SXin Li 
9491*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_set_pd(double __A,double __B,double __C,double __D,double __E,double __F,double __G,double __H)9492*67e74705SXin Li _mm512_set_pd (double __A, double __B, double __C, double __D,
9493*67e74705SXin Li         double __E, double __F, double __G, double __H)
9494*67e74705SXin Li {
9495*67e74705SXin Li   return __extension__ (__m512d)
9496*67e74705SXin Li   { __H, __G, __F, __E, __D, __C, __B, __A };
9497*67e74705SXin Li }
9498*67e74705SXin Li 
9499*67e74705SXin Li #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9500*67e74705SXin Li   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9501*67e74705SXin Li 
9502*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_set_ps(float __A,float __B,float __C,float __D,float __E,float __F,float __G,float __H,float __I,float __J,float __K,float __L,float __M,float __N,float __O,float __P)9503*67e74705SXin Li _mm512_set_ps (float __A, float __B, float __C, float __D,
9504*67e74705SXin Li         float __E, float __F, float __G, float __H,
9505*67e74705SXin Li         float __I, float __J, float __K, float __L,
9506*67e74705SXin Li         float __M, float __N, float __O, float __P)
9507*67e74705SXin Li {
9508*67e74705SXin Li   return __extension__ (__m512)
9509*67e74705SXin Li   { __P, __O, __N, __M, __L, __K, __J, __I,
9510*67e74705SXin Li     __H, __G, __F, __E, __D, __C, __B, __A };
9511*67e74705SXin Li }
9512*67e74705SXin Li 
9513*67e74705SXin Li #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9514*67e74705SXin Li   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9515*67e74705SXin Li                 (e4),(e3),(e2),(e1),(e0))
9516*67e74705SXin Li 
9517*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_abs_ps(__m512 A)9518*67e74705SXin Li _mm512_abs_ps(__m512 A)
9519*67e74705SXin Li {
9520*67e74705SXin Li   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
9521*67e74705SXin Li }
9522*67e74705SXin Li 
9523*67e74705SXin Li static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_abs_ps(__m512 W,__mmask16 K,__m512 A)9524*67e74705SXin Li _mm512_mask_abs_ps(__m512 W, __mmask16 K, __m512 A)
9525*67e74705SXin Li {
9526*67e74705SXin Li   return (__m512)_mm512_mask_and_epi32((__m512i)W, K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
9527*67e74705SXin Li }
9528*67e74705SXin Li 
9529*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_abs_pd(__m512d A)9530*67e74705SXin Li _mm512_abs_pd(__m512d A)
9531*67e74705SXin Li {
9532*67e74705SXin Li   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A) ;
9533*67e74705SXin Li }
9534*67e74705SXin Li 
9535*67e74705SXin Li static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_abs_pd(__m512d W,__mmask8 K,__m512d A)9536*67e74705SXin Li _mm512_mask_abs_pd(__m512d W, __mmask8 K, __m512d A)
9537*67e74705SXin Li {
9538*67e74705SXin Li   return (__m512d)_mm512_mask_and_epi64((__v8di)W, K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A);
9539*67e74705SXin Li }
9540*67e74705SXin Li 
9541*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
9542*67e74705SXin Li 
9543*67e74705SXin Li #endif // __AVX512FINTRIN_H
9544