xref: /aosp_15_r20/external/OpenCL-CTS/dependencies/ocl-headers/CL/cl_half.h (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1*6467f958SSadaf Ebrahimi /*******************************************************************************
2*6467f958SSadaf Ebrahimi  * Copyright (c) 2019-2020 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi  *
4*6467f958SSadaf Ebrahimi  * Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi  * you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi  * You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi  *
8*6467f958SSadaf Ebrahimi  *    http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi  *
10*6467f958SSadaf Ebrahimi  * Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi  * distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi  * See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi  * limitations under the License.
15*6467f958SSadaf Ebrahimi  ******************************************************************************/
16*6467f958SSadaf Ebrahimi 
17*6467f958SSadaf Ebrahimi /**
18*6467f958SSadaf Ebrahimi  * This is a header-only utility library that provides OpenCL host code with
19*6467f958SSadaf Ebrahimi  * routines for converting to/from cl_half values.
20*6467f958SSadaf Ebrahimi  *
21*6467f958SSadaf Ebrahimi  * Example usage:
22*6467f958SSadaf Ebrahimi  *
23*6467f958SSadaf Ebrahimi  *    #include <CL/cl_half.h>
24*6467f958SSadaf Ebrahimi  *    ...
25*6467f958SSadaf Ebrahimi  *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
26*6467f958SSadaf Ebrahimi  *    cl_float f = cl_half_to_float(h);
27*6467f958SSadaf Ebrahimi  */
28*6467f958SSadaf Ebrahimi 
29*6467f958SSadaf Ebrahimi #ifndef OPENCL_CL_HALF_H
30*6467f958SSadaf Ebrahimi #define OPENCL_CL_HALF_H
31*6467f958SSadaf Ebrahimi 
32*6467f958SSadaf Ebrahimi #include <CL/cl_platform.h>
33*6467f958SSadaf Ebrahimi 
34*6467f958SSadaf Ebrahimi #include <stdint.h>
35*6467f958SSadaf Ebrahimi 
36*6467f958SSadaf Ebrahimi #ifdef __cplusplus
37*6467f958SSadaf Ebrahimi extern "C" {
38*6467f958SSadaf Ebrahimi #endif
39*6467f958SSadaf Ebrahimi 
40*6467f958SSadaf Ebrahimi 
41*6467f958SSadaf Ebrahimi /**
42*6467f958SSadaf Ebrahimi  * Rounding mode used when converting to cl_half.
43*6467f958SSadaf Ebrahimi  */
44*6467f958SSadaf Ebrahimi typedef enum
45*6467f958SSadaf Ebrahimi {
46*6467f958SSadaf Ebrahimi   CL_HALF_RTE, // round to nearest even
47*6467f958SSadaf Ebrahimi   CL_HALF_RTZ, // round towards zero
48*6467f958SSadaf Ebrahimi   CL_HALF_RTP, // round towards positive infinity
49*6467f958SSadaf Ebrahimi   CL_HALF_RTN, // round towards negative infinity
50*6467f958SSadaf Ebrahimi } cl_half_rounding_mode;
51*6467f958SSadaf Ebrahimi 
52*6467f958SSadaf Ebrahimi 
53*6467f958SSadaf Ebrahimi /* Private utility macros. */
54*6467f958SSadaf Ebrahimi #define CL_HALF_EXP_MASK 0x7C00
55*6467f958SSadaf Ebrahimi #define CL_HALF_MAX_FINITE_MAG 0x7BFF
56*6467f958SSadaf Ebrahimi 
57*6467f958SSadaf Ebrahimi 
58*6467f958SSadaf Ebrahimi /*
59*6467f958SSadaf Ebrahimi  * Utility to deal with values that overflow when converting to half precision.
60*6467f958SSadaf Ebrahimi  */
cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,uint16_t sign)61*6467f958SSadaf Ebrahimi static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
62*6467f958SSadaf Ebrahimi                                               uint16_t sign)
63*6467f958SSadaf Ebrahimi {
64*6467f958SSadaf Ebrahimi   if (rounding_mode == CL_HALF_RTZ)
65*6467f958SSadaf Ebrahimi   {
66*6467f958SSadaf Ebrahimi     // Round overflow towards zero -> largest finite number (preserving sign)
67*6467f958SSadaf Ebrahimi     return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
68*6467f958SSadaf Ebrahimi   }
69*6467f958SSadaf Ebrahimi   else if (rounding_mode == CL_HALF_RTP && sign)
70*6467f958SSadaf Ebrahimi   {
71*6467f958SSadaf Ebrahimi     // Round negative overflow towards positive infinity -> most negative finite number
72*6467f958SSadaf Ebrahimi     return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
73*6467f958SSadaf Ebrahimi   }
74*6467f958SSadaf Ebrahimi   else if (rounding_mode == CL_HALF_RTN && !sign)
75*6467f958SSadaf Ebrahimi   {
76*6467f958SSadaf Ebrahimi     // Round positive overflow towards negative infinity -> largest finite number
77*6467f958SSadaf Ebrahimi     return CL_HALF_MAX_FINITE_MAG;
78*6467f958SSadaf Ebrahimi   }
79*6467f958SSadaf Ebrahimi 
80*6467f958SSadaf Ebrahimi   // Overflow to infinity
81*6467f958SSadaf Ebrahimi   return (sign << 15) | CL_HALF_EXP_MASK;
82*6467f958SSadaf Ebrahimi }
83*6467f958SSadaf Ebrahimi 
84*6467f958SSadaf Ebrahimi /*
85*6467f958SSadaf Ebrahimi  * Utility to deal with values that underflow when converting to half precision.
86*6467f958SSadaf Ebrahimi  */
cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,uint16_t sign)87*6467f958SSadaf Ebrahimi static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
88*6467f958SSadaf Ebrahimi                                                uint16_t sign)
89*6467f958SSadaf Ebrahimi {
90*6467f958SSadaf Ebrahimi   if (rounding_mode == CL_HALF_RTP && !sign)
91*6467f958SSadaf Ebrahimi   {
92*6467f958SSadaf Ebrahimi     // Round underflow towards positive infinity -> smallest positive value
93*6467f958SSadaf Ebrahimi     return (sign << 15) | 1;
94*6467f958SSadaf Ebrahimi   }
95*6467f958SSadaf Ebrahimi   else if (rounding_mode == CL_HALF_RTN && sign)
96*6467f958SSadaf Ebrahimi   {
97*6467f958SSadaf Ebrahimi     // Round underflow towards negative infinity -> largest negative value
98*6467f958SSadaf Ebrahimi     return (sign << 15) | 1;
99*6467f958SSadaf Ebrahimi   }
100*6467f958SSadaf Ebrahimi 
101*6467f958SSadaf Ebrahimi   // Flush to zero
102*6467f958SSadaf Ebrahimi   return (sign << 15);
103*6467f958SSadaf Ebrahimi }
104*6467f958SSadaf Ebrahimi 
105*6467f958SSadaf Ebrahimi 
106*6467f958SSadaf Ebrahimi /**
107*6467f958SSadaf Ebrahimi  * Convert a cl_float to a cl_half.
108*6467f958SSadaf Ebrahimi  */
cl_half_from_float(cl_float f,cl_half_rounding_mode rounding_mode)109*6467f958SSadaf Ebrahimi static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
110*6467f958SSadaf Ebrahimi {
111*6467f958SSadaf Ebrahimi   // Type-punning to get direct access to underlying bits
112*6467f958SSadaf Ebrahimi   union
113*6467f958SSadaf Ebrahimi   {
114*6467f958SSadaf Ebrahimi     cl_float f;
115*6467f958SSadaf Ebrahimi     uint32_t i;
116*6467f958SSadaf Ebrahimi   } f32;
117*6467f958SSadaf Ebrahimi   f32.f = f;
118*6467f958SSadaf Ebrahimi 
119*6467f958SSadaf Ebrahimi   // Extract sign bit
120*6467f958SSadaf Ebrahimi   uint16_t sign = f32.i >> 31;
121*6467f958SSadaf Ebrahimi 
122*6467f958SSadaf Ebrahimi   // Extract FP32 exponent and mantissa
123*6467f958SSadaf Ebrahimi   uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
124*6467f958SSadaf Ebrahimi   uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
125*6467f958SSadaf Ebrahimi 
126*6467f958SSadaf Ebrahimi   // Remove FP32 exponent bias
127*6467f958SSadaf Ebrahimi   int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
128*6467f958SSadaf Ebrahimi 
129*6467f958SSadaf Ebrahimi   // Add FP16 exponent bias
130*6467f958SSadaf Ebrahimi   uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
131*6467f958SSadaf Ebrahimi 
132*6467f958SSadaf Ebrahimi   // Position of the bit that will become the FP16 mantissa LSB
133*6467f958SSadaf Ebrahimi   uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
134*6467f958SSadaf Ebrahimi 
135*6467f958SSadaf Ebrahimi   // Check for NaN / infinity
136*6467f958SSadaf Ebrahimi   if (f_exp == 0xFF)
137*6467f958SSadaf Ebrahimi   {
138*6467f958SSadaf Ebrahimi     if (f_mant)
139*6467f958SSadaf Ebrahimi     {
140*6467f958SSadaf Ebrahimi       // NaN -> propagate mantissa and silence it
141*6467f958SSadaf Ebrahimi       uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
142*6467f958SSadaf Ebrahimi       h_mant |= 0x200;
143*6467f958SSadaf Ebrahimi       return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
144*6467f958SSadaf Ebrahimi     }
145*6467f958SSadaf Ebrahimi     else
146*6467f958SSadaf Ebrahimi     {
147*6467f958SSadaf Ebrahimi       // Infinity -> zero mantissa
148*6467f958SSadaf Ebrahimi       return (sign << 15) | CL_HALF_EXP_MASK;
149*6467f958SSadaf Ebrahimi     }
150*6467f958SSadaf Ebrahimi   }
151*6467f958SSadaf Ebrahimi 
152*6467f958SSadaf Ebrahimi   // Check for zero
153*6467f958SSadaf Ebrahimi   if (!f_exp && !f_mant)
154*6467f958SSadaf Ebrahimi   {
155*6467f958SSadaf Ebrahimi     return (sign << 15);
156*6467f958SSadaf Ebrahimi   }
157*6467f958SSadaf Ebrahimi 
158*6467f958SSadaf Ebrahimi   // Check for overflow
159*6467f958SSadaf Ebrahimi   if (exp >= CL_HALF_MAX_EXP)
160*6467f958SSadaf Ebrahimi   {
161*6467f958SSadaf Ebrahimi     return cl_half_handle_overflow(rounding_mode, sign);
162*6467f958SSadaf Ebrahimi   }
163*6467f958SSadaf Ebrahimi 
164*6467f958SSadaf Ebrahimi   // Check for underflow
165*6467f958SSadaf Ebrahimi   if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
166*6467f958SSadaf Ebrahimi   {
167*6467f958SSadaf Ebrahimi     return cl_half_handle_underflow(rounding_mode, sign);
168*6467f958SSadaf Ebrahimi   }
169*6467f958SSadaf Ebrahimi 
170*6467f958SSadaf Ebrahimi   // Check for value that will become denormal
171*6467f958SSadaf Ebrahimi   if (exp < -14)
172*6467f958SSadaf Ebrahimi   {
173*6467f958SSadaf Ebrahimi     // Denormal -> include the implicit 1 from the FP32 mantissa
174*6467f958SSadaf Ebrahimi     h_exp = 0;
175*6467f958SSadaf Ebrahimi     f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
176*6467f958SSadaf Ebrahimi 
177*6467f958SSadaf Ebrahimi     // Mantissa shift amount depends on exponent
178*6467f958SSadaf Ebrahimi     lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
179*6467f958SSadaf Ebrahimi   }
180*6467f958SSadaf Ebrahimi 
181*6467f958SSadaf Ebrahimi   // Generate FP16 mantissa by shifting FP32 mantissa
182*6467f958SSadaf Ebrahimi   uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
183*6467f958SSadaf Ebrahimi 
184*6467f958SSadaf Ebrahimi   // Check whether we need to round
185*6467f958SSadaf Ebrahimi   uint32_t halfway = 1 << (lsb_pos - 1);
186*6467f958SSadaf Ebrahimi   uint32_t mask = (halfway << 1) - 1;
187*6467f958SSadaf Ebrahimi   switch (rounding_mode)
188*6467f958SSadaf Ebrahimi   {
189*6467f958SSadaf Ebrahimi     case CL_HALF_RTE:
190*6467f958SSadaf Ebrahimi       if ((f_mant & mask) > halfway)
191*6467f958SSadaf Ebrahimi       {
192*6467f958SSadaf Ebrahimi         // More than halfway -> round up
193*6467f958SSadaf Ebrahimi         h_mant += 1;
194*6467f958SSadaf Ebrahimi       }
195*6467f958SSadaf Ebrahimi       else if ((f_mant & mask) == halfway)
196*6467f958SSadaf Ebrahimi       {
197*6467f958SSadaf Ebrahimi         // Exactly halfway -> round to nearest even
198*6467f958SSadaf Ebrahimi         if (h_mant & 0x1)
199*6467f958SSadaf Ebrahimi           h_mant += 1;
200*6467f958SSadaf Ebrahimi       }
201*6467f958SSadaf Ebrahimi       break;
202*6467f958SSadaf Ebrahimi     case CL_HALF_RTZ:
203*6467f958SSadaf Ebrahimi       // Mantissa has already been truncated -> do nothing
204*6467f958SSadaf Ebrahimi       break;
205*6467f958SSadaf Ebrahimi     case CL_HALF_RTP:
206*6467f958SSadaf Ebrahimi       if ((f_mant & mask) && !sign)
207*6467f958SSadaf Ebrahimi       {
208*6467f958SSadaf Ebrahimi         // Round positive numbers up
209*6467f958SSadaf Ebrahimi         h_mant += 1;
210*6467f958SSadaf Ebrahimi       }
211*6467f958SSadaf Ebrahimi       break;
212*6467f958SSadaf Ebrahimi     case CL_HALF_RTN:
213*6467f958SSadaf Ebrahimi       if ((f_mant & mask) && sign)
214*6467f958SSadaf Ebrahimi       {
215*6467f958SSadaf Ebrahimi         // Round negative numbers down
216*6467f958SSadaf Ebrahimi         h_mant += 1;
217*6467f958SSadaf Ebrahimi       }
218*6467f958SSadaf Ebrahimi       break;
219*6467f958SSadaf Ebrahimi   }
220*6467f958SSadaf Ebrahimi 
221*6467f958SSadaf Ebrahimi   // Check for mantissa overflow
222*6467f958SSadaf Ebrahimi   if (h_mant & 0x400)
223*6467f958SSadaf Ebrahimi   {
224*6467f958SSadaf Ebrahimi     h_exp += 1;
225*6467f958SSadaf Ebrahimi     h_mant = 0;
226*6467f958SSadaf Ebrahimi   }
227*6467f958SSadaf Ebrahimi 
228*6467f958SSadaf Ebrahimi   return (sign << 15) | (h_exp << 10) | h_mant;
229*6467f958SSadaf Ebrahimi }
230*6467f958SSadaf Ebrahimi 
231*6467f958SSadaf Ebrahimi 
232*6467f958SSadaf Ebrahimi /**
233*6467f958SSadaf Ebrahimi  * Convert a cl_double to a cl_half.
234*6467f958SSadaf Ebrahimi  */
cl_half_from_double(cl_double d,cl_half_rounding_mode rounding_mode)235*6467f958SSadaf Ebrahimi static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
236*6467f958SSadaf Ebrahimi {
237*6467f958SSadaf Ebrahimi   // Type-punning to get direct access to underlying bits
238*6467f958SSadaf Ebrahimi   union
239*6467f958SSadaf Ebrahimi   {
240*6467f958SSadaf Ebrahimi     cl_double d;
241*6467f958SSadaf Ebrahimi     uint64_t i;
242*6467f958SSadaf Ebrahimi   } f64;
243*6467f958SSadaf Ebrahimi   f64.d = d;
244*6467f958SSadaf Ebrahimi 
245*6467f958SSadaf Ebrahimi   // Extract sign bit
246*6467f958SSadaf Ebrahimi   uint16_t sign = f64.i >> 63;
247*6467f958SSadaf Ebrahimi 
248*6467f958SSadaf Ebrahimi   // Extract FP64 exponent and mantissa
249*6467f958SSadaf Ebrahimi   uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
250*6467f958SSadaf Ebrahimi   uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
251*6467f958SSadaf Ebrahimi 
252*6467f958SSadaf Ebrahimi   // Remove FP64 exponent bias
253*6467f958SSadaf Ebrahimi   int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
254*6467f958SSadaf Ebrahimi 
255*6467f958SSadaf Ebrahimi   // Add FP16 exponent bias
256*6467f958SSadaf Ebrahimi   uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
257*6467f958SSadaf Ebrahimi 
258*6467f958SSadaf Ebrahimi   // Position of the bit that will become the FP16 mantissa LSB
259*6467f958SSadaf Ebrahimi   uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
260*6467f958SSadaf Ebrahimi 
261*6467f958SSadaf Ebrahimi   // Check for NaN / infinity
262*6467f958SSadaf Ebrahimi   if (d_exp == 0x7FF)
263*6467f958SSadaf Ebrahimi   {
264*6467f958SSadaf Ebrahimi     if (d_mant)
265*6467f958SSadaf Ebrahimi     {
266*6467f958SSadaf Ebrahimi       // NaN -> propagate mantissa and silence it
267*6467f958SSadaf Ebrahimi       uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
268*6467f958SSadaf Ebrahimi       h_mant |= 0x200;
269*6467f958SSadaf Ebrahimi       return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
270*6467f958SSadaf Ebrahimi     }
271*6467f958SSadaf Ebrahimi     else
272*6467f958SSadaf Ebrahimi     {
273*6467f958SSadaf Ebrahimi       // Infinity -> zero mantissa
274*6467f958SSadaf Ebrahimi       return (sign << 15) | CL_HALF_EXP_MASK;
275*6467f958SSadaf Ebrahimi     }
276*6467f958SSadaf Ebrahimi   }
277*6467f958SSadaf Ebrahimi 
278*6467f958SSadaf Ebrahimi   // Check for zero
279*6467f958SSadaf Ebrahimi   if (!d_exp && !d_mant)
280*6467f958SSadaf Ebrahimi   {
281*6467f958SSadaf Ebrahimi     return (sign << 15);
282*6467f958SSadaf Ebrahimi   }
283*6467f958SSadaf Ebrahimi 
284*6467f958SSadaf Ebrahimi   // Check for overflow
285*6467f958SSadaf Ebrahimi   if (exp >= CL_HALF_MAX_EXP)
286*6467f958SSadaf Ebrahimi   {
287*6467f958SSadaf Ebrahimi     return cl_half_handle_overflow(rounding_mode, sign);
288*6467f958SSadaf Ebrahimi   }
289*6467f958SSadaf Ebrahimi 
290*6467f958SSadaf Ebrahimi   // Check for underflow
291*6467f958SSadaf Ebrahimi   if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
292*6467f958SSadaf Ebrahimi   {
293*6467f958SSadaf Ebrahimi     return cl_half_handle_underflow(rounding_mode, sign);
294*6467f958SSadaf Ebrahimi   }
295*6467f958SSadaf Ebrahimi 
296*6467f958SSadaf Ebrahimi   // Check for value that will become denormal
297*6467f958SSadaf Ebrahimi   if (exp < -14)
298*6467f958SSadaf Ebrahimi   {
299*6467f958SSadaf Ebrahimi     // Include the implicit 1 from the FP64 mantissa
300*6467f958SSadaf Ebrahimi     h_exp = 0;
301*6467f958SSadaf Ebrahimi     d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
302*6467f958SSadaf Ebrahimi 
303*6467f958SSadaf Ebrahimi     // Mantissa shift amount depends on exponent
304*6467f958SSadaf Ebrahimi     lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
305*6467f958SSadaf Ebrahimi   }
306*6467f958SSadaf Ebrahimi 
307*6467f958SSadaf Ebrahimi   // Generate FP16 mantissa by shifting FP64 mantissa
308*6467f958SSadaf Ebrahimi   uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
309*6467f958SSadaf Ebrahimi 
310*6467f958SSadaf Ebrahimi   // Check whether we need to round
311*6467f958SSadaf Ebrahimi   uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
312*6467f958SSadaf Ebrahimi   uint64_t mask = (halfway << 1) - 1;
313*6467f958SSadaf Ebrahimi   switch (rounding_mode)
314*6467f958SSadaf Ebrahimi   {
315*6467f958SSadaf Ebrahimi     case CL_HALF_RTE:
316*6467f958SSadaf Ebrahimi       if ((d_mant & mask) > halfway)
317*6467f958SSadaf Ebrahimi       {
318*6467f958SSadaf Ebrahimi         // More than halfway -> round up
319*6467f958SSadaf Ebrahimi         h_mant += 1;
320*6467f958SSadaf Ebrahimi       }
321*6467f958SSadaf Ebrahimi       else if ((d_mant & mask) == halfway)
322*6467f958SSadaf Ebrahimi       {
323*6467f958SSadaf Ebrahimi         // Exactly halfway -> round to nearest even
324*6467f958SSadaf Ebrahimi         if (h_mant & 0x1)
325*6467f958SSadaf Ebrahimi           h_mant += 1;
326*6467f958SSadaf Ebrahimi       }
327*6467f958SSadaf Ebrahimi       break;
328*6467f958SSadaf Ebrahimi     case CL_HALF_RTZ:
329*6467f958SSadaf Ebrahimi       // Mantissa has already been truncated -> do nothing
330*6467f958SSadaf Ebrahimi       break;
331*6467f958SSadaf Ebrahimi     case CL_HALF_RTP:
332*6467f958SSadaf Ebrahimi       if ((d_mant & mask) && !sign)
333*6467f958SSadaf Ebrahimi       {
334*6467f958SSadaf Ebrahimi         // Round positive numbers up
335*6467f958SSadaf Ebrahimi         h_mant += 1;
336*6467f958SSadaf Ebrahimi       }
337*6467f958SSadaf Ebrahimi       break;
338*6467f958SSadaf Ebrahimi     case CL_HALF_RTN:
339*6467f958SSadaf Ebrahimi       if ((d_mant & mask) && sign)
340*6467f958SSadaf Ebrahimi       {
341*6467f958SSadaf Ebrahimi         // Round negative numbers down
342*6467f958SSadaf Ebrahimi         h_mant += 1;
343*6467f958SSadaf Ebrahimi       }
344*6467f958SSadaf Ebrahimi       break;
345*6467f958SSadaf Ebrahimi   }
346*6467f958SSadaf Ebrahimi 
347*6467f958SSadaf Ebrahimi   // Check for mantissa overflow
348*6467f958SSadaf Ebrahimi   if (h_mant & 0x400)
349*6467f958SSadaf Ebrahimi   {
350*6467f958SSadaf Ebrahimi     h_exp += 1;
351*6467f958SSadaf Ebrahimi     h_mant = 0;
352*6467f958SSadaf Ebrahimi   }
353*6467f958SSadaf Ebrahimi 
354*6467f958SSadaf Ebrahimi   return (sign << 15) | (h_exp << 10) | h_mant;
355*6467f958SSadaf Ebrahimi }
356*6467f958SSadaf Ebrahimi 
357*6467f958SSadaf Ebrahimi 
358*6467f958SSadaf Ebrahimi /**
359*6467f958SSadaf Ebrahimi  * Convert a cl_half to a cl_float.
360*6467f958SSadaf Ebrahimi  */
cl_half_to_float(cl_half h)361*6467f958SSadaf Ebrahimi static inline cl_float cl_half_to_float(cl_half h)
362*6467f958SSadaf Ebrahimi {
363*6467f958SSadaf Ebrahimi   // Type-punning to get direct access to underlying bits
364*6467f958SSadaf Ebrahimi   union
365*6467f958SSadaf Ebrahimi   {
366*6467f958SSadaf Ebrahimi     cl_float f;
367*6467f958SSadaf Ebrahimi     uint32_t i;
368*6467f958SSadaf Ebrahimi   } f32;
369*6467f958SSadaf Ebrahimi 
370*6467f958SSadaf Ebrahimi   // Extract sign bit
371*6467f958SSadaf Ebrahimi   uint16_t sign = h >> 15;
372*6467f958SSadaf Ebrahimi 
373*6467f958SSadaf Ebrahimi   // Extract FP16 exponent and mantissa
374*6467f958SSadaf Ebrahimi   uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
375*6467f958SSadaf Ebrahimi   uint16_t h_mant = h & 0x3FF;
376*6467f958SSadaf Ebrahimi 
377*6467f958SSadaf Ebrahimi   // Remove FP16 exponent bias
378*6467f958SSadaf Ebrahimi   int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
379*6467f958SSadaf Ebrahimi 
380*6467f958SSadaf Ebrahimi   // Add FP32 exponent bias
381*6467f958SSadaf Ebrahimi   uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
382*6467f958SSadaf Ebrahimi 
383*6467f958SSadaf Ebrahimi   // Check for NaN / infinity
384*6467f958SSadaf Ebrahimi   if (h_exp == 0x1F)
385*6467f958SSadaf Ebrahimi   {
386*6467f958SSadaf Ebrahimi     if (h_mant)
387*6467f958SSadaf Ebrahimi     {
388*6467f958SSadaf Ebrahimi       // NaN -> propagate mantissa and silence it
389*6467f958SSadaf Ebrahimi       uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
390*6467f958SSadaf Ebrahimi       f_mant |= 0x400000;
391*6467f958SSadaf Ebrahimi       f32.i = (sign << 31) | 0x7F800000 | f_mant;
392*6467f958SSadaf Ebrahimi       return f32.f;
393*6467f958SSadaf Ebrahimi     }
394*6467f958SSadaf Ebrahimi     else
395*6467f958SSadaf Ebrahimi     {
396*6467f958SSadaf Ebrahimi       // Infinity -> zero mantissa
397*6467f958SSadaf Ebrahimi       f32.i = (sign << 31) | 0x7F800000;
398*6467f958SSadaf Ebrahimi       return f32.f;
399*6467f958SSadaf Ebrahimi     }
400*6467f958SSadaf Ebrahimi   }
401*6467f958SSadaf Ebrahimi 
402*6467f958SSadaf Ebrahimi   // Check for zero / denormal
403*6467f958SSadaf Ebrahimi   if (h_exp == 0)
404*6467f958SSadaf Ebrahimi   {
405*6467f958SSadaf Ebrahimi     if (h_mant == 0)
406*6467f958SSadaf Ebrahimi     {
407*6467f958SSadaf Ebrahimi       // Zero -> zero exponent
408*6467f958SSadaf Ebrahimi       f_exp = 0;
409*6467f958SSadaf Ebrahimi     }
410*6467f958SSadaf Ebrahimi     else
411*6467f958SSadaf Ebrahimi     {
412*6467f958SSadaf Ebrahimi       // Denormal -> normalize it
413*6467f958SSadaf Ebrahimi       // - Shift mantissa to make most-significant 1 implicit
414*6467f958SSadaf Ebrahimi       // - Adjust exponent accordingly
415*6467f958SSadaf Ebrahimi       uint32_t shift = 0;
416*6467f958SSadaf Ebrahimi       while ((h_mant & 0x400) == 0)
417*6467f958SSadaf Ebrahimi       {
418*6467f958SSadaf Ebrahimi         h_mant <<= 1;
419*6467f958SSadaf Ebrahimi         shift++;
420*6467f958SSadaf Ebrahimi       }
421*6467f958SSadaf Ebrahimi       h_mant &= 0x3FF;
422*6467f958SSadaf Ebrahimi       f_exp -= shift - 1;
423*6467f958SSadaf Ebrahimi     }
424*6467f958SSadaf Ebrahimi   }
425*6467f958SSadaf Ebrahimi 
426*6467f958SSadaf Ebrahimi   f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
427*6467f958SSadaf Ebrahimi   return f32.f;
428*6467f958SSadaf Ebrahimi }
429*6467f958SSadaf Ebrahimi 
430*6467f958SSadaf Ebrahimi 
431*6467f958SSadaf Ebrahimi #undef CL_HALF_EXP_MASK
432*6467f958SSadaf Ebrahimi #undef CL_HALF_MAX_FINITE_MAG
433*6467f958SSadaf Ebrahimi 
434*6467f958SSadaf Ebrahimi 
435*6467f958SSadaf Ebrahimi #ifdef __cplusplus
436*6467f958SSadaf Ebrahimi }
437*6467f958SSadaf Ebrahimi #endif
438*6467f958SSadaf Ebrahimi 
439*6467f958SSadaf Ebrahimi 
440*6467f958SSadaf Ebrahimi #endif  /* OPENCL_CL_HALF_H */
441