1*bed243d3SAndroid Build Coastguard Worker /*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------===
2*bed243d3SAndroid Build Coastguard Worker *
3*bed243d3SAndroid Build Coastguard Worker * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*bed243d3SAndroid Build Coastguard Worker * See https://llvm.org/LICENSE.txt for license information.
5*bed243d3SAndroid Build Coastguard Worker * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*bed243d3SAndroid Build Coastguard Worker *
7*bed243d3SAndroid Build Coastguard Worker *===-----------------------------------------------------------------------===
8*bed243d3SAndroid Build Coastguard Worker */
9*bed243d3SAndroid Build Coastguard Worker
10*bed243d3SAndroid Build Coastguard Worker #ifndef __IMMINTRIN_H
11*bed243d3SAndroid Build Coastguard Worker #error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
12*bed243d3SAndroid Build Coastguard Worker #endif
13*bed243d3SAndroid Build Coastguard Worker
14*bed243d3SAndroid Build Coastguard Worker #ifndef __AVXIFMAINTRIN_H
15*bed243d3SAndroid Build Coastguard Worker #define __AVXIFMAINTRIN_H
16*bed243d3SAndroid Build Coastguard Worker
17*bed243d3SAndroid Build Coastguard Worker /* Define the default attributes for the functions in this file. */
18*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS128 \
19*bed243d3SAndroid Build Coastguard Worker __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \
20*bed243d3SAndroid Build Coastguard Worker __min_vector_width__(128)))
21*bed243d3SAndroid Build Coastguard Worker #define __DEFAULT_FN_ATTRS256 \
22*bed243d3SAndroid Build Coastguard Worker __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \
23*bed243d3SAndroid Build Coastguard Worker __min_vector_width__(256)))
24*bed243d3SAndroid Build Coastguard Worker
25*bed243d3SAndroid Build Coastguard Worker // must vex-encoding
26*bed243d3SAndroid Build Coastguard Worker
27*bed243d3SAndroid Build Coastguard Worker /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
28*bed243d3SAndroid Build Coastguard Worker /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
29*bed243d3SAndroid Build Coastguard Worker /// unsigned integer from the intermediate result with the corresponding
30*bed243d3SAndroid Build Coastguard Worker /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
31*bed243d3SAndroid Build Coastguard Worker ///
32*bed243d3SAndroid Build Coastguard Worker /// \headerfile <immintrin.h>
33*bed243d3SAndroid Build Coastguard Worker ///
34*bed243d3SAndroid Build Coastguard Worker /// \code
35*bed243d3SAndroid Build Coastguard Worker /// __m128i
36*bed243d3SAndroid Build Coastguard Worker /// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
37*bed243d3SAndroid Build Coastguard Worker /// \endcode
38*bed243d3SAndroid Build Coastguard Worker ///
39*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
40*bed243d3SAndroid Build Coastguard Worker ///
41*bed243d3SAndroid Build Coastguard Worker /// \return
42*bed243d3SAndroid Build Coastguard Worker /// return __m128i dst.
43*bed243d3SAndroid Build Coastguard Worker /// \param __X
44*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
45*bed243d3SAndroid Build Coastguard Worker /// \param __Y
46*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
47*bed243d3SAndroid Build Coastguard Worker /// \param __Z
48*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
49*bed243d3SAndroid Build Coastguard Worker ///
50*bed243d3SAndroid Build Coastguard Worker /// \code{.operation}
51*bed243d3SAndroid Build Coastguard Worker /// FOR j := 0 to 1
52*bed243d3SAndroid Build Coastguard Worker /// i := j*64
53*bed243d3SAndroid Build Coastguard Worker /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
54*bed243d3SAndroid Build Coastguard Worker /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
55*bed243d3SAndroid Build Coastguard Worker /// ENDFOR
56*bed243d3SAndroid Build Coastguard Worker /// dst[MAX:128] := 0
57*bed243d3SAndroid Build Coastguard Worker /// \endcode
58*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52hi_avx_epu64(__m128i __X,__m128i __Y,__m128i __Z)59*bed243d3SAndroid Build Coastguard Worker _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
60*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
61*bed243d3SAndroid Build Coastguard Worker (__v2di)__Z);
62*bed243d3SAndroid Build Coastguard Worker }
63*bed243d3SAndroid Build Coastguard Worker
64*bed243d3SAndroid Build Coastguard Worker /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
65*bed243d3SAndroid Build Coastguard Worker /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
66*bed243d3SAndroid Build Coastguard Worker /// unsigned integer from the intermediate result with the corresponding
67*bed243d3SAndroid Build Coastguard Worker /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
68*bed243d3SAndroid Build Coastguard Worker ///
69*bed243d3SAndroid Build Coastguard Worker /// \headerfile <immintrin.h>
70*bed243d3SAndroid Build Coastguard Worker ///
71*bed243d3SAndroid Build Coastguard Worker /// \code
72*bed243d3SAndroid Build Coastguard Worker /// __m256i
73*bed243d3SAndroid Build Coastguard Worker /// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
74*bed243d3SAndroid Build Coastguard Worker /// \endcode
75*bed243d3SAndroid Build Coastguard Worker ///
76*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
77*bed243d3SAndroid Build Coastguard Worker ///
78*bed243d3SAndroid Build Coastguard Worker /// \return
79*bed243d3SAndroid Build Coastguard Worker /// return __m256i dst.
80*bed243d3SAndroid Build Coastguard Worker /// \param __X
81*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
82*bed243d3SAndroid Build Coastguard Worker /// \param __Y
83*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
84*bed243d3SAndroid Build Coastguard Worker /// \param __Z
85*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
86*bed243d3SAndroid Build Coastguard Worker ///
87*bed243d3SAndroid Build Coastguard Worker /// \code{.operation}
88*bed243d3SAndroid Build Coastguard Worker /// FOR j := 0 to 3
89*bed243d3SAndroid Build Coastguard Worker /// i := j*64
90*bed243d3SAndroid Build Coastguard Worker /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
91*bed243d3SAndroid Build Coastguard Worker /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
92*bed243d3SAndroid Build Coastguard Worker /// ENDFOR
93*bed243d3SAndroid Build Coastguard Worker /// dst[MAX:256] := 0
94*bed243d3SAndroid Build Coastguard Worker /// \endcode
95*bed243d3SAndroid Build Coastguard Worker static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52hi_avx_epu64(__m256i __X,__m256i __Y,__m256i __Z)96*bed243d3SAndroid Build Coastguard Worker _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
97*bed243d3SAndroid Build Coastguard Worker return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
98*bed243d3SAndroid Build Coastguard Worker (__v4di)__Z);
99*bed243d3SAndroid Build Coastguard Worker }
100*bed243d3SAndroid Build Coastguard Worker
101*bed243d3SAndroid Build Coastguard Worker /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
102*bed243d3SAndroid Build Coastguard Worker /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
103*bed243d3SAndroid Build Coastguard Worker /// unsigned integer from the intermediate result with the corresponding
104*bed243d3SAndroid Build Coastguard Worker /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
105*bed243d3SAndroid Build Coastguard Worker ///
106*bed243d3SAndroid Build Coastguard Worker /// \headerfile <immintrin.h>
107*bed243d3SAndroid Build Coastguard Worker ///
108*bed243d3SAndroid Build Coastguard Worker /// \code
109*bed243d3SAndroid Build Coastguard Worker /// __m128i
110*bed243d3SAndroid Build Coastguard Worker /// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
111*bed243d3SAndroid Build Coastguard Worker /// \endcode
112*bed243d3SAndroid Build Coastguard Worker ///
113*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
114*bed243d3SAndroid Build Coastguard Worker ///
115*bed243d3SAndroid Build Coastguard Worker /// \return
116*bed243d3SAndroid Build Coastguard Worker /// return __m128i dst.
117*bed243d3SAndroid Build Coastguard Worker /// \param __X
118*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
119*bed243d3SAndroid Build Coastguard Worker /// \param __Y
120*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
121*bed243d3SAndroid Build Coastguard Worker /// \param __Z
122*bed243d3SAndroid Build Coastguard Worker /// A 128-bit vector of [2 x i64]
123*bed243d3SAndroid Build Coastguard Worker ///
124*bed243d3SAndroid Build Coastguard Worker /// \code{.operation}
125*bed243d3SAndroid Build Coastguard Worker /// FOR j := 0 to 1
126*bed243d3SAndroid Build Coastguard Worker /// i := j*64
127*bed243d3SAndroid Build Coastguard Worker /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
128*bed243d3SAndroid Build Coastguard Worker /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
129*bed243d3SAndroid Build Coastguard Worker /// ENDFOR
130*bed243d3SAndroid Build Coastguard Worker /// dst[MAX:128] := 0
131*bed243d3SAndroid Build Coastguard Worker /// \endcode
132*bed243d3SAndroid Build Coastguard Worker static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52lo_avx_epu64(__m128i __X,__m128i __Y,__m128i __Z)133*bed243d3SAndroid Build Coastguard Worker _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
134*bed243d3SAndroid Build Coastguard Worker return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
135*bed243d3SAndroid Build Coastguard Worker (__v2di)__Z);
136*bed243d3SAndroid Build Coastguard Worker }
137*bed243d3SAndroid Build Coastguard Worker
138*bed243d3SAndroid Build Coastguard Worker /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
139*bed243d3SAndroid Build Coastguard Worker /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
140*bed243d3SAndroid Build Coastguard Worker /// unsigned integer from the intermediate result with the corresponding
141*bed243d3SAndroid Build Coastguard Worker /// unsigned 64-bit integer in \a __X, and store the results in \a dst.
142*bed243d3SAndroid Build Coastguard Worker ///
143*bed243d3SAndroid Build Coastguard Worker /// \headerfile <immintrin.h>
144*bed243d3SAndroid Build Coastguard Worker ///
145*bed243d3SAndroid Build Coastguard Worker /// \code
146*bed243d3SAndroid Build Coastguard Worker /// __m256i
147*bed243d3SAndroid Build Coastguard Worker /// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
148*bed243d3SAndroid Build Coastguard Worker /// \endcode
149*bed243d3SAndroid Build Coastguard Worker ///
150*bed243d3SAndroid Build Coastguard Worker /// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
151*bed243d3SAndroid Build Coastguard Worker ///
152*bed243d3SAndroid Build Coastguard Worker /// \return
153*bed243d3SAndroid Build Coastguard Worker /// return __m256i dst.
154*bed243d3SAndroid Build Coastguard Worker /// \param __X
155*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
156*bed243d3SAndroid Build Coastguard Worker /// \param __Y
157*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
158*bed243d3SAndroid Build Coastguard Worker /// \param __Z
159*bed243d3SAndroid Build Coastguard Worker /// A 256-bit vector of [4 x i64]
160*bed243d3SAndroid Build Coastguard Worker ///
161*bed243d3SAndroid Build Coastguard Worker /// \code{.operation}
162*bed243d3SAndroid Build Coastguard Worker /// FOR j := 0 to 3
163*bed243d3SAndroid Build Coastguard Worker /// i := j*64
164*bed243d3SAndroid Build Coastguard Worker /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
165*bed243d3SAndroid Build Coastguard Worker /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
166*bed243d3SAndroid Build Coastguard Worker /// ENDFOR
167*bed243d3SAndroid Build Coastguard Worker /// dst[MAX:256] := 0
168*bed243d3SAndroid Build Coastguard Worker /// \endcode
169*bed243d3SAndroid Build Coastguard Worker static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52lo_avx_epu64(__m256i __X,__m256i __Y,__m256i __Z)170*bed243d3SAndroid Build Coastguard Worker _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
171*bed243d3SAndroid Build Coastguard Worker return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
172*bed243d3SAndroid Build Coastguard Worker (__v4di)__Z);
173*bed243d3SAndroid Build Coastguard Worker }
174*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS128
175*bed243d3SAndroid Build Coastguard Worker #undef __DEFAULT_FN_ATTRS256
176*bed243d3SAndroid Build Coastguard Worker
177*bed243d3SAndroid Build Coastguard Worker #endif // __AVXIFMAINTRIN_H
178