xref: /aosp_15_r20/external/clang/lib/Headers/avx512vbmivlintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  *
4*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
5*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
6*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
7*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
9*67e74705SXin Li  * furnished to do so, subject to the following conditions:
10*67e74705SXin Li  *
11*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
12*67e74705SXin Li  * all copies or substantial portions of the Software.
13*67e74705SXin Li  *
14*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20*67e74705SXin Li  * THE SOFTWARE.
21*67e74705SXin Li  *
22*67e74705SXin Li  *===-----------------------------------------------------------------------===
23*67e74705SXin Li  */
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li 
28*67e74705SXin Li #ifndef __VBMIVLINTRIN_H
29*67e74705SXin Li #define __VBMIVLINTRIN_H
30*67e74705SXin Li 
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
33*67e74705SXin Li 
34*67e74705SXin Li 
35*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask2_permutex2var_epi8(__m128i __A,__m128i __I,__mmask16 __U,__m128i __B)36*67e74705SXin Li _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
37*67e74705SXin Li             __m128i __B)
38*67e74705SXin Li {
39*67e74705SXin Li   return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
40*67e74705SXin Li               (__v16qi) __I
41*67e74705SXin Li               /* idx */ ,
42*67e74705SXin Li               (__v16qi) __B,
43*67e74705SXin Li               (__mmask16)
44*67e74705SXin Li               __U);
45*67e74705SXin Li }
46*67e74705SXin Li 
47*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask2_permutex2var_epi8(__m256i __A,__m256i __I,__mmask32 __U,__m256i __B)48*67e74705SXin Li _mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
49*67e74705SXin Li          __mmask32 __U, __m256i __B)
50*67e74705SXin Li {
51*67e74705SXin Li   return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
52*67e74705SXin Li               (__v32qi) __I
53*67e74705SXin Li               /* idx */ ,
54*67e74705SXin Li               (__v32qi) __B,
55*67e74705SXin Li               (__mmask32)
56*67e74705SXin Li               __U);
57*67e74705SXin Li }
58*67e74705SXin Li 
59*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_permutex2var_epi8(__m128i __A,__m128i __I,__m128i __B)60*67e74705SXin Li _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
61*67e74705SXin Li {
62*67e74705SXin Li   return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
63*67e74705SXin Li               /* idx */ ,
64*67e74705SXin Li               (__v16qi) __A,
65*67e74705SXin Li               (__v16qi) __B,
66*67e74705SXin Li               (__mmask16) -
67*67e74705SXin Li               1);
68*67e74705SXin Li }
69*67e74705SXin Li 
70*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_permutex2var_epi8(__m128i __A,__mmask16 __U,__m128i __I,__m128i __B)71*67e74705SXin Li _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
72*67e74705SXin Li            __m128i __B)
73*67e74705SXin Li {
74*67e74705SXin Li   return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
75*67e74705SXin Li               /* idx */ ,
76*67e74705SXin Li               (__v16qi) __A,
77*67e74705SXin Li               (__v16qi) __B,
78*67e74705SXin Li               (__mmask16)
79*67e74705SXin Li               __U);
80*67e74705SXin Li }
81*67e74705SXin Li 
82*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_permutex2var_epi8(__mmask16 __U,__m128i __A,__m128i __I,__m128i __B)83*67e74705SXin Li _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
84*67e74705SXin Li             __m128i __B)
85*67e74705SXin Li {
86*67e74705SXin Li   return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
87*67e74705SXin Li                /* idx */ ,
88*67e74705SXin Li                (__v16qi) __A,
89*67e74705SXin Li                (__v16qi) __B,
90*67e74705SXin Li                (__mmask16)
91*67e74705SXin Li                __U);
92*67e74705SXin Li }
93*67e74705SXin Li 
94*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_permutex2var_epi8(__m256i __A,__m256i __I,__m256i __B)95*67e74705SXin Li _mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
96*67e74705SXin Li {
97*67e74705SXin Li   return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
98*67e74705SXin Li               /* idx */ ,
99*67e74705SXin Li               (__v32qi) __A,
100*67e74705SXin Li               (__v32qi) __B,
101*67e74705SXin Li               (__mmask32) -
102*67e74705SXin Li               1);
103*67e74705SXin Li }
104*67e74705SXin Li 
105*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_permutex2var_epi8(__m256i __A,__mmask32 __U,__m256i __I,__m256i __B)106*67e74705SXin Li _mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
107*67e74705SXin Li         __m256i __I, __m256i __B)
108*67e74705SXin Li {
109*67e74705SXin Li   return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
110*67e74705SXin Li               /* idx */ ,
111*67e74705SXin Li               (__v32qi) __A,
112*67e74705SXin Li               (__v32qi) __B,
113*67e74705SXin Li               (__mmask32)
114*67e74705SXin Li               __U);
115*67e74705SXin Li }
116*67e74705SXin Li 
117*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_permutex2var_epi8(__mmask32 __U,__m256i __A,__m256i __I,__m256i __B)118*67e74705SXin Li _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
119*67e74705SXin Li          __m256i __I, __m256i __B)
120*67e74705SXin Li {
121*67e74705SXin Li   return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
122*67e74705SXin Li                /* idx */ ,
123*67e74705SXin Li                (__v32qi) __A,
124*67e74705SXin Li                (__v32qi) __B,
125*67e74705SXin Li                (__mmask32)
126*67e74705SXin Li                __U);
127*67e74705SXin Li }
128*67e74705SXin Li 
129*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_permutexvar_epi8(__m128i __A,__m128i __B)130*67e74705SXin Li _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
131*67e74705SXin Li {
132*67e74705SXin Li   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
133*67e74705SXin Li                  (__v16qi) __A,
134*67e74705SXin Li                  (__v16qi) _mm_undefined_si128 (),
135*67e74705SXin Li                  (__mmask16) -1);
136*67e74705SXin Li }
137*67e74705SXin Li 
138*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_permutexvar_epi8(__mmask16 __M,__m128i __A,__m128i __B)139*67e74705SXin Li _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
140*67e74705SXin Li {
141*67e74705SXin Li   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
142*67e74705SXin Li                  (__v16qi) __A,
143*67e74705SXin Li                  (__v16qi) _mm_setzero_si128 (),
144*67e74705SXin Li                  (__mmask16) __M);
145*67e74705SXin Li }
146*67e74705SXin Li 
147*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_permutexvar_epi8(__m128i __W,__mmask16 __M,__m128i __A,__m128i __B)148*67e74705SXin Li _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
149*67e74705SXin Li           __m128i __B)
150*67e74705SXin Li {
151*67e74705SXin Li   return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
152*67e74705SXin Li                  (__v16qi) __A,
153*67e74705SXin Li                  (__v16qi) __W,
154*67e74705SXin Li                  (__mmask16) __M);
155*67e74705SXin Li }
156*67e74705SXin Li 
157*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_permutexvar_epi8(__m256i __A,__m256i __B)158*67e74705SXin Li _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
159*67e74705SXin Li {
160*67e74705SXin Li   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
161*67e74705SXin Li                  (__v32qi) __A,
162*67e74705SXin Li                  (__v32qi) _mm256_undefined_si256 (),
163*67e74705SXin Li                  (__mmask32) -1);
164*67e74705SXin Li }
165*67e74705SXin Li 
166*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_permutexvar_epi8(__mmask32 __M,__m256i __A,__m256i __B)167*67e74705SXin Li _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
168*67e74705SXin Li         __m256i __B)
169*67e74705SXin Li {
170*67e74705SXin Li   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
171*67e74705SXin Li                  (__v32qi) __A,
172*67e74705SXin Li                  (__v32qi) _mm256_setzero_si256 (),
173*67e74705SXin Li                  (__mmask32) __M);
174*67e74705SXin Li }
175*67e74705SXin Li 
176*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_permutexvar_epi8(__m256i __W,__mmask32 __M,__m256i __A,__m256i __B)177*67e74705SXin Li _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
178*67e74705SXin Li              __m256i __B)
179*67e74705SXin Li {
180*67e74705SXin Li   return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
181*67e74705SXin Li                  (__v32qi) __A,
182*67e74705SXin Li                  (__v32qi) __W,
183*67e74705SXin Li                  (__mmask32) __M);
184*67e74705SXin Li }
185*67e74705SXin Li 
186*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_multishift_epi64_epi8(__m128i __W,__mmask16 __M,__m128i __X,__m128i __Y)187*67e74705SXin Li _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
188*67e74705SXin Li {
189*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
190*67e74705SXin Li                 (__v16qi) __Y,
191*67e74705SXin Li                 (__v16qi) __W,
192*67e74705SXin Li                 (__mmask16) __M);
193*67e74705SXin Li }
194*67e74705SXin Li 
195*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_multishift_epi64_epi8(__mmask16 __M,__m128i __X,__m128i __Y)196*67e74705SXin Li _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
197*67e74705SXin Li {
198*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
199*67e74705SXin Li                 (__v16qi) __Y,
200*67e74705SXin Li                 (__v16qi)
201*67e74705SXin Li                 _mm_setzero_si128 (),
202*67e74705SXin Li                 (__mmask16) __M);
203*67e74705SXin Li }
204*67e74705SXin Li 
205*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_multishift_epi64_epi8(__m128i __X,__m128i __Y)206*67e74705SXin Li _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
207*67e74705SXin Li {
208*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
209*67e74705SXin Li                 (__v16qi) __Y,
210*67e74705SXin Li                 (__v16qi)
211*67e74705SXin Li                 _mm_undefined_si128 (),
212*67e74705SXin Li                 (__mmask16) -1);
213*67e74705SXin Li }
214*67e74705SXin Li 
215*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_multishift_epi64_epi8(__m256i __W,__mmask32 __M,__m256i __X,__m256i __Y)216*67e74705SXin Li _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
217*67e74705SXin Li {
218*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
219*67e74705SXin Li                 (__v32qi) __Y,
220*67e74705SXin Li                 (__v32qi) __W,
221*67e74705SXin Li                 (__mmask32) __M);
222*67e74705SXin Li }
223*67e74705SXin Li 
224*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M,__m256i __X,__m256i __Y)225*67e74705SXin Li _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
226*67e74705SXin Li {
227*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
228*67e74705SXin Li                 (__v32qi) __Y,
229*67e74705SXin Li                 (__v32qi)
230*67e74705SXin Li                 _mm256_setzero_si256 (),
231*67e74705SXin Li                 (__mmask32) __M);
232*67e74705SXin Li }
233*67e74705SXin Li 
234*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_multishift_epi64_epi8(__m256i __X,__m256i __Y)235*67e74705SXin Li _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
236*67e74705SXin Li {
237*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
238*67e74705SXin Li                 (__v32qi) __Y,
239*67e74705SXin Li                 (__v32qi)
240*67e74705SXin Li                 _mm256_undefined_si256 (),
241*67e74705SXin Li                 (__mmask32) -1);
242*67e74705SXin Li }
243*67e74705SXin Li 
244*67e74705SXin Li 
245*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
246*67e74705SXin Li 
247*67e74705SXin Li #endif
248