xref: /aosp_15_r20/external/clang/lib/Headers/avx512ifmavlintrin.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li /*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
2*67e74705SXin Li  *
3*67e74705SXin Li  *
4*67e74705SXin Li  * Permission is hereby granted, free of charge, to any person obtaining a copy
5*67e74705SXin Li  * of this software and associated documentation files (the "Software"), to deal
6*67e74705SXin Li  * in the Software without restriction, including without limitation the rights
7*67e74705SXin Li  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8*67e74705SXin Li  * copies of the Software, and to permit persons to whom the Software is
9*67e74705SXin Li  * furnished to do so, subject to the following conditions:
10*67e74705SXin Li  *
11*67e74705SXin Li  * The above copyright notice and this permission notice shall be included in
12*67e74705SXin Li  * all copies or substantial portions of the Software.
13*67e74705SXin Li  *
14*67e74705SXin Li  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*67e74705SXin Li  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*67e74705SXin Li  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17*67e74705SXin Li  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18*67e74705SXin Li  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19*67e74705SXin Li  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20*67e74705SXin Li  * THE SOFTWARE.
21*67e74705SXin Li  *
22*67e74705SXin Li  *===-----------------------------------------------------------------------===
23*67e74705SXin Li  */
24*67e74705SXin Li #ifndef __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li 
28*67e74705SXin Li #ifndef __IFMAVLINTRIN_H
29*67e74705SXin Li #define __IFMAVLINTRIN_H
30*67e74705SXin Li 
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl")))
33*67e74705SXin Li 
34*67e74705SXin Li 
35*67e74705SXin Li 
36*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_madd52hi_epu64(__m128i __X,__m128i __Y,__m128i __Z)37*67e74705SXin Li _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
38*67e74705SXin Li {
39*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
40*67e74705SXin Li                    (__v2di) __Y,
41*67e74705SXin Li                    (__v2di) __Z,
42*67e74705SXin Li                    (__mmask8) -1);
43*67e74705SXin Li }
44*67e74705SXin Li 
45*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_madd52hi_epu64(__m128i __W,__mmask8 __M,__m128i __X,__m128i __Y)46*67e74705SXin Li _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
47*67e74705SXin Li {
48*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
49*67e74705SXin Li                    (__v2di) __X,
50*67e74705SXin Li                    (__v2di) __Y,
51*67e74705SXin Li                    (__mmask8) __M);
52*67e74705SXin Li }
53*67e74705SXin Li 
54*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_madd52hi_epu64(__mmask8 __M,__m128i __X,__m128i __Y,__m128i __Z)55*67e74705SXin Li _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
56*67e74705SXin Li {
57*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
58*67e74705SXin Li               (__v2di) __Y,
59*67e74705SXin Li               (__v2di) __Z,
60*67e74705SXin Li               (__mmask8) __M);
61*67e74705SXin Li }
62*67e74705SXin Li 
63*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_madd52hi_epu64(__m256i __X,__m256i __Y,__m256i __Z)64*67e74705SXin Li _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
65*67e74705SXin Li {
66*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
67*67e74705SXin Li                    (__v4di) __Y,
68*67e74705SXin Li                    (__v4di) __Z,
69*67e74705SXin Li                    (__mmask8) -1);
70*67e74705SXin Li }
71*67e74705SXin Li 
72*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_madd52hi_epu64(__m256i __W,__mmask8 __M,__m256i __X,__m256i __Y)73*67e74705SXin Li _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
74*67e74705SXin Li           __m256i __Y)
75*67e74705SXin Li {
76*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
77*67e74705SXin Li                    (__v4di) __X,
78*67e74705SXin Li                    (__v4di) __Y,
79*67e74705SXin Li                    (__mmask8) __M);
80*67e74705SXin Li }
81*67e74705SXin Li 
82*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_madd52hi_epu64(__mmask8 __M,__m256i __X,__m256i __Y,__m256i __Z)83*67e74705SXin Li _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
84*67e74705SXin Li {
85*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
86*67e74705SXin Li               (__v4di) __Y,
87*67e74705SXin Li               (__v4di) __Z,
88*67e74705SXin Li               (__mmask8) __M);
89*67e74705SXin Li }
90*67e74705SXin Li 
91*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_madd52lo_epu64(__m128i __X,__m128i __Y,__m128i __Z)92*67e74705SXin Li _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
93*67e74705SXin Li {
94*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
95*67e74705SXin Li                    (__v2di) __Y,
96*67e74705SXin Li                    (__v2di) __Z,
97*67e74705SXin Li                    (__mmask8) -1);
98*67e74705SXin Li }
99*67e74705SXin Li 
100*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_madd52lo_epu64(__m128i __W,__mmask8 __M,__m128i __X,__m128i __Y)101*67e74705SXin Li _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
102*67e74705SXin Li {
103*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
104*67e74705SXin Li                    (__v2di) __X,
105*67e74705SXin Li                    (__v2di) __Y,
106*67e74705SXin Li                    (__mmask8) __M);
107*67e74705SXin Li }
108*67e74705SXin Li 
109*67e74705SXin Li static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_madd52lo_epu64(__mmask8 __M,__m128i __X,__m128i __Y,__m128i __Z)110*67e74705SXin Li _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
111*67e74705SXin Li {
112*67e74705SXin Li   return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
113*67e74705SXin Li               (__v2di) __Y,
114*67e74705SXin Li               (__v2di) __Z,
115*67e74705SXin Li               (__mmask8) __M);
116*67e74705SXin Li }
117*67e74705SXin Li 
118*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_madd52lo_epu64(__m256i __X,__m256i __Y,__m256i __Z)119*67e74705SXin Li _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
120*67e74705SXin Li {
121*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
122*67e74705SXin Li                    (__v4di) __Y,
123*67e74705SXin Li                    (__v4di) __Z,
124*67e74705SXin Li                    (__mmask8) -1);
125*67e74705SXin Li }
126*67e74705SXin Li 
127*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_madd52lo_epu64(__m256i __W,__mmask8 __M,__m256i __X,__m256i __Y)128*67e74705SXin Li _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
129*67e74705SXin Li           __m256i __Y)
130*67e74705SXin Li {
131*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
132*67e74705SXin Li                    (__v4di) __X,
133*67e74705SXin Li                    (__v4di) __Y,
134*67e74705SXin Li                    (__mmask8) __M);
135*67e74705SXin Li }
136*67e74705SXin Li 
137*67e74705SXin Li static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_madd52lo_epu64(__mmask8 __M,__m256i __X,__m256i __Y,__m256i __Z)138*67e74705SXin Li _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
139*67e74705SXin Li {
140*67e74705SXin Li   return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
141*67e74705SXin Li               (__v4di) __Y,
142*67e74705SXin Li               (__v4di) __Z,
143*67e74705SXin Li               (__mmask8) __M);
144*67e74705SXin Li }
145*67e74705SXin Li 
146*67e74705SXin Li 
147*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
148*67e74705SXin Li 
149*67e74705SXin Li #endif
150