1*67e74705SXin Li /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
2*67e74705SXin Li *
3*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy
4*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal
5*67e74705SXin Li * in the Software without restriction, including without limitation the rights
6*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is
8*67e74705SXin Li * furnished to do so, subject to the following conditions:
9*67e74705SXin Li *
10*67e74705SXin Li * The above copyright notice and this permission notice shall be included in
11*67e74705SXin Li * all copies or substantial portions of the Software.
12*67e74705SXin Li *
13*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19*67e74705SXin Li * THE SOFTWARE.
20*67e74705SXin Li *
21*67e74705SXin Li *===-----------------------------------------------------------------------===
22*67e74705SXin Li */
23*67e74705SXin Li
24*67e74705SXin Li #if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
25*67e74705SXin Li #error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead."
26*67e74705SXin Li #endif
27*67e74705SXin Li
28*67e74705SXin Li #ifndef __F16CINTRIN_H
29*67e74705SXin Li #define __F16CINTRIN_H
30*67e74705SXin Li
31*67e74705SXin Li /* Define the default attributes for the functions in this file. */
32*67e74705SXin Li #define __DEFAULT_FN_ATTRS \
33*67e74705SXin Li __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
34*67e74705SXin Li
35*67e74705SXin Li /// \brief Converts a 16-bit half-precision float value into a 32-bit float
36*67e74705SXin Li /// value.
37*67e74705SXin Li ///
38*67e74705SXin Li /// \headerfile <x86intrin.h>
39*67e74705SXin Li ///
40*67e74705SXin Li /// This intrinsic corresponds to the \c VCVTPH2PS instruction.
41*67e74705SXin Li ///
42*67e74705SXin Li /// \param __a
43*67e74705SXin Li /// A 16-bit half-precision float value.
44*67e74705SXin Li /// \returns The converted 32-bit float value.
45*67e74705SXin Li static __inline float __DEFAULT_FN_ATTRS
_cvtsh_ss(unsigned short __a)46*67e74705SXin Li _cvtsh_ss(unsigned short __a)
47*67e74705SXin Li {
48*67e74705SXin Li __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
49*67e74705SXin Li __v4sf r = __builtin_ia32_vcvtph2ps(v);
50*67e74705SXin Li return r[0];
51*67e74705SXin Li }
52*67e74705SXin Li
53*67e74705SXin Li /// \brief Converts a 32-bit single-precision float value to a 16-bit
54*67e74705SXin Li /// half-precision float value.
55*67e74705SXin Li ///
56*67e74705SXin Li /// \headerfile <x86intrin.h>
57*67e74705SXin Li ///
58*67e74705SXin Li /// \code
59*67e74705SXin Li /// unsigned short _cvtss_sh(float a, const int imm);
60*67e74705SXin Li /// \endcode
61*67e74705SXin Li ///
62*67e74705SXin Li /// This intrinsic corresponds to the \c VCVTPS2PH instruction.
63*67e74705SXin Li ///
64*67e74705SXin Li /// \param a
65*67e74705SXin Li /// A 32-bit single-precision float value to be converted to a 16-bit
66*67e74705SXin Li /// half-precision float value.
67*67e74705SXin Li /// \param imm
68*67e74705SXin Li /// An immediate value controlling rounding using bits [2:0]:
69*67e74705SXin Li /// 000: Nearest
70*67e74705SXin Li /// 001: Down
71*67e74705SXin Li /// 010: Up
72*67e74705SXin Li /// 011: Truncate
73*67e74705SXin Li /// 1XX: Use MXCSR.RC for rounding
74*67e74705SXin Li /// \returns The converted 16-bit half-precision float value.
75*67e74705SXin Li #define _cvtss_sh(a, imm) \
76*67e74705SXin Li ((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
77*67e74705SXin Li (imm)))[0]))
78*67e74705SXin Li
79*67e74705SXin Li /// \brief Converts a 128-bit vector containing 32-bit float values into a
80*67e74705SXin Li /// 128-bit vector containing 16-bit half-precision float values.
81*67e74705SXin Li ///
82*67e74705SXin Li /// \headerfile <x86intrin.h>
83*67e74705SXin Li ///
84*67e74705SXin Li /// \code
85*67e74705SXin Li /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
86*67e74705SXin Li /// \endcode
87*67e74705SXin Li ///
88*67e74705SXin Li /// This intrinsic corresponds to the \c VCVTPS2PH instruction.
89*67e74705SXin Li ///
90*67e74705SXin Li /// \param a
91*67e74705SXin Li /// A 128-bit vector containing 32-bit float values.
92*67e74705SXin Li /// \param imm
93*67e74705SXin Li /// An immediate value controlling rounding using bits [2:0]:
94*67e74705SXin Li /// 000: Nearest
95*67e74705SXin Li /// 001: Down
96*67e74705SXin Li /// 010: Up
97*67e74705SXin Li /// 011: Truncate
98*67e74705SXin Li /// 1XX: Use MXCSR.RC for rounding
99*67e74705SXin Li /// \returns A 128-bit vector containing converted 16-bit half-precision float
100*67e74705SXin Li /// values. The lower 64 bits are used to store the converted 16-bit
101*67e74705SXin Li /// half-precision floating-point values.
102*67e74705SXin Li #define _mm_cvtps_ph(a, imm) \
103*67e74705SXin Li ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
104*67e74705SXin Li
105*67e74705SXin Li /// \brief Converts a 128-bit vector containing 16-bit half-precision float
106*67e74705SXin Li /// values into a 128-bit vector containing 32-bit float values.
107*67e74705SXin Li ///
108*67e74705SXin Li /// \headerfile <x86intrin.h>
109*67e74705SXin Li ///
110*67e74705SXin Li /// This intrinsic corresponds to the \c VCVTPH2PS instruction.
111*67e74705SXin Li ///
112*67e74705SXin Li /// \param __a
113*67e74705SXin Li /// A 128-bit vector containing 16-bit half-precision float values. The lower
114*67e74705SXin Li /// 64 bits are used in the conversion.
115*67e74705SXin Li /// \returns A 128-bit vector of [4 x float] containing converted float values.
116*67e74705SXin Li static __inline __m128 __DEFAULT_FN_ATTRS
_mm_cvtph_ps(__m128i __a)117*67e74705SXin Li _mm_cvtph_ps(__m128i __a)
118*67e74705SXin Li {
119*67e74705SXin Li return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
120*67e74705SXin Li }
121*67e74705SXin Li
122*67e74705SXin Li #undef __DEFAULT_FN_ATTRS
123*67e74705SXin Li
124*67e74705SXin Li #endif /* __F16CINTRIN_H */
125