xref: /aosp_15_r20/external/clang/lib/Headers/avx512vlcdintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li  * furnished to do so, subject to the following conditions:
9*67e74705SXin Li  *
10*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li  * all copies or substantial portions of the Software.
12*67e74705SXin Li  *
13*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li  * THE SOFTWARE.
20*67e74705SXin Li  *
21*67e74705SXin Li  *===-----------------------------------------------------------------------===
22*67e74705SXin Li  */
23*67e74705SXin Li #ifndef __IMMINTRIN_H
24*67e74705SXin Li #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
25*67e74705SXin Li #endif
26*67e74705SXin Li 
27*67e74705SXin Li #ifndef __AVX512VLCDINTRIN_H
28*67e74705SXin Li #define __AVX512VLCDINTRIN_H
29*67e74705SXin Li 
30*67e74705SXin Li /* Define the default attributes for the functions in this file. */
31*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
32*67e74705SXin Li 
33*67e74705SXin Li 
34*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastmb_epi64(__mmask8 __A)35*67e74705SXin Li _mm_broadcastmb_epi64 (__mmask8 __A)
36*67e74705SXin Li {
37*67e74705SXin Li   return (__m128i) __builtin_ia32_broadcastmb128 (__A);
38*67e74705SXin Li }
39*67e74705SXin Li 
40*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastmb_epi64(__mmask8 __A)41*67e74705SXin Li _mm256_broadcastmb_epi64 (__mmask8 __A)
42*67e74705SXin Li {
43*67e74705SXin Li   return (__m256i) __builtin_ia32_broadcastmb256 (__A);
44*67e74705SXin Li }
45*67e74705SXin Li 
46*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_broadcastmw_epi32(__mmask16 __A)47*67e74705SXin Li _mm_broadcastmw_epi32 (__mmask16 __A)
48*67e74705SXin Li {
49*67e74705SXin Li   return (__m128i) __builtin_ia32_broadcastmw128 (__A);
50*67e74705SXin Li }
51*67e74705SXin Li 
52*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastmw_epi32(__mmask16 __A)53*67e74705SXin Li _mm256_broadcastmw_epi32 (__mmask16 __A)
54*67e74705SXin Li {
55*67e74705SXin Li   return (__m256i) __builtin_ia32_broadcastmw256 (__A);
56*67e74705SXin Li }
57*67e74705SXin Li 
58*67e74705SXin Li 
59*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_conflict_epi64(__m128i __A)60*67e74705SXin Li _mm_conflict_epi64 (__m128i __A)
61*67e74705SXin Li {
62*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
63*67e74705SXin Li                (__v2di) _mm_undefined_si128 (),
64*67e74705SXin Li                (__mmask8) -1);
65*67e74705SXin Li }
66*67e74705SXin Li 
67*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_conflict_epi64(__m128i __W,__mmask8 __U,__m128i __A)68*67e74705SXin Li _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
69*67e74705SXin Li {
70*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
71*67e74705SXin Li                (__v2di) __W,
72*67e74705SXin Li                (__mmask8) __U);
73*67e74705SXin Li }
74*67e74705SXin Li 
75*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_conflict_epi64(__mmask8 __U,__m128i __A)76*67e74705SXin Li _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
77*67e74705SXin Li {
78*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
79*67e74705SXin Li                (__v2di)
80*67e74705SXin Li                _mm_setzero_di (),
81*67e74705SXin Li                (__mmask8) __U);
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_conflict_epi64(__m256i __A)85*67e74705SXin Li _mm256_conflict_epi64 (__m256i __A)
86*67e74705SXin Li {
87*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
88*67e74705SXin Li                (__v4di)  _mm256_undefined_si256 (),
89*67e74705SXin Li                (__mmask8) -1);
90*67e74705SXin Li }
91*67e74705SXin Li 
92*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_conflict_epi64(__m256i __W,__mmask8 __U,__m256i __A)93*67e74705SXin Li _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
94*67e74705SXin Li {
95*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
96*67e74705SXin Li                (__v4di) __W,
97*67e74705SXin Li                (__mmask8) __U);
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_conflict_epi64(__mmask8 __U,__m256i __A)101*67e74705SXin Li _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
102*67e74705SXin Li {
103*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
104*67e74705SXin Li                (__v4di) _mm256_setzero_si256 (),
105*67e74705SXin Li                (__mmask8) __U);
106*67e74705SXin Li }
107*67e74705SXin Li 
108*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_conflict_epi32(__m128i __A)109*67e74705SXin Li _mm_conflict_epi32 (__m128i __A)
110*67e74705SXin Li {
111*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
112*67e74705SXin Li                (__v4si) _mm_undefined_si128 (),
113*67e74705SXin Li                (__mmask8) -1);
114*67e74705SXin Li }
115*67e74705SXin Li 
116*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_conflict_epi32(__m128i __W,__mmask8 __U,__m128i __A)117*67e74705SXin Li _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
118*67e74705SXin Li {
119*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
120*67e74705SXin Li                (__v4si) __W,
121*67e74705SXin Li                (__mmask8) __U);
122*67e74705SXin Li }
123*67e74705SXin Li 
124*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_conflict_epi32(__mmask8 __U,__m128i __A)125*67e74705SXin Li _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
126*67e74705SXin Li {
127*67e74705SXin Li   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
128*67e74705SXin Li                (__v4si) _mm_setzero_si128 (),
129*67e74705SXin Li                (__mmask8) __U);
130*67e74705SXin Li }
131*67e74705SXin Li 
132*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_conflict_epi32(__m256i __A)133*67e74705SXin Li _mm256_conflict_epi32 (__m256i __A)
134*67e74705SXin Li {
135*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
136*67e74705SXin Li                (__v8si) _mm256_undefined_si256 (),
137*67e74705SXin Li                (__mmask8) -1);
138*67e74705SXin Li }
139*67e74705SXin Li 
140*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_conflict_epi32(__m256i __W,__mmask8 __U,__m256i __A)141*67e74705SXin Li _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
142*67e74705SXin Li {
143*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
144*67e74705SXin Li                (__v8si) __W,
145*67e74705SXin Li                (__mmask8) __U);
146*67e74705SXin Li }
147*67e74705SXin Li 
148*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_conflict_epi32(__mmask8 __U,__m256i __A)149*67e74705SXin Li _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
150*67e74705SXin Li {
151*67e74705SXin Li   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
152*67e74705SXin Li                (__v8si)
153*67e74705SXin Li                _mm256_setzero_si256 (),
154*67e74705SXin Li                (__mmask8) __U);
155*67e74705SXin Li }
156*67e74705SXin Li 
157*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_lzcnt_epi32(__m128i __A)158*67e74705SXin Li _mm_lzcnt_epi32 (__m128i __A)
159*67e74705SXin Li {
160*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
161*67e74705SXin Li                  (__v4si)
162*67e74705SXin Li                  _mm_setzero_si128 (),
163*67e74705SXin Li                  (__mmask8) -1);
164*67e74705SXin Li }
165*67e74705SXin Li 
166*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_lzcnt_epi32(__m128i __W,__mmask8 __U,__m128i __A)167*67e74705SXin Li _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
168*67e74705SXin Li {
169*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
170*67e74705SXin Li                  (__v4si) __W,
171*67e74705SXin Li                  (__mmask8) __U);
172*67e74705SXin Li }
173*67e74705SXin Li 
174*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_lzcnt_epi32(__mmask8 __U,__m128i __A)175*67e74705SXin Li _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
176*67e74705SXin Li {
177*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
178*67e74705SXin Li                  (__v4si)
179*67e74705SXin Li                  _mm_setzero_si128 (),
180*67e74705SXin Li                  (__mmask8) __U);
181*67e74705SXin Li }
182*67e74705SXin Li 
183*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_lzcnt_epi32(__m256i __A)184*67e74705SXin Li _mm256_lzcnt_epi32 (__m256i __A)
185*67e74705SXin Li {
186*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
187*67e74705SXin Li                  (__v8si)
188*67e74705SXin Li                  _mm256_setzero_si256 (),
189*67e74705SXin Li                  (__mmask8) -1);
190*67e74705SXin Li }
191*67e74705SXin Li 
192*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_lzcnt_epi32(__m256i __W,__mmask8 __U,__m256i __A)193*67e74705SXin Li _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
194*67e74705SXin Li {
195*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
196*67e74705SXin Li                  (__v8si) __W,
197*67e74705SXin Li                  (__mmask8) __U);
198*67e74705SXin Li }
199*67e74705SXin Li 
200*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_lzcnt_epi32(__mmask8 __U,__m256i __A)201*67e74705SXin Li _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
202*67e74705SXin Li {
203*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
204*67e74705SXin Li                  (__v8si)
205*67e74705SXin Li                  _mm256_setzero_si256 (),
206*67e74705SXin Li                  (__mmask8) __U);
207*67e74705SXin Li }
208*67e74705SXin Li 
209*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_lzcnt_epi64(__m128i __A)210*67e74705SXin Li _mm_lzcnt_epi64 (__m128i __A)
211*67e74705SXin Li {
212*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
213*67e74705SXin Li                  (__v2di)
214*67e74705SXin Li                  _mm_setzero_di (),
215*67e74705SXin Li                  (__mmask8) -1);
216*67e74705SXin Li }
217*67e74705SXin Li 
218*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_lzcnt_epi64(__m128i __W,__mmask8 __U,__m128i __A)219*67e74705SXin Li _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
220*67e74705SXin Li {
221*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
222*67e74705SXin Li                  (__v2di) __W,
223*67e74705SXin Li                  (__mmask8) __U);
224*67e74705SXin Li }
225*67e74705SXin Li 
226*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_lzcnt_epi64(__mmask8 __U,__m128i __A)227*67e74705SXin Li _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
228*67e74705SXin Li {
229*67e74705SXin Li   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
230*67e74705SXin Li                  (__v2di)
231*67e74705SXin Li                  _mm_setzero_di (),
232*67e74705SXin Li                  (__mmask8) __U);
233*67e74705SXin Li }
234*67e74705SXin Li 
235*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_lzcnt_epi64(__m256i __A)236*67e74705SXin Li _mm256_lzcnt_epi64 (__m256i __A)
237*67e74705SXin Li {
238*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
239*67e74705SXin Li                  (__v4di)
240*67e74705SXin Li                  _mm256_setzero_si256 (),
241*67e74705SXin Li                  (__mmask8) -1);
242*67e74705SXin Li }
243*67e74705SXin Li 
244*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_lzcnt_epi64(__m256i __W,__mmask8 __U,__m256i __A)245*67e74705SXin Li _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
246*67e74705SXin Li {
247*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
248*67e74705SXin Li                  (__v4di) __W,
249*67e74705SXin Li                  (__mmask8) __U);
250*67e74705SXin Li }
251*67e74705SXin Li 
252*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_lzcnt_epi64(__mmask8 __U,__m256i __A)253*67e74705SXin Li _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
254*67e74705SXin Li {
255*67e74705SXin Li   return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
256*67e74705SXin Li                  (__v4di)
257*67e74705SXin Li                  _mm256_setzero_si256 (),
258*67e74705SXin Li                  (__mmask8) __U);
259*67e74705SXin Li }
260*67e74705SXin Li 
261*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
262*67e74705SXin Li 
263*67e74705SXin Li #endif /* __AVX512VLCDINTRIN_H */
264