1*6467f958SSadaf Ebrahimi /*******************************************************************************
2*6467f958SSadaf Ebrahimi * Copyright (c) 2019-2020 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi *
4*6467f958SSadaf Ebrahimi * Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi * you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi * You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi *
8*6467f958SSadaf Ebrahimi * http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi *
10*6467f958SSadaf Ebrahimi * Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi * distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi * See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi * limitations under the License.
15*6467f958SSadaf Ebrahimi ******************************************************************************/
16*6467f958SSadaf Ebrahimi
17*6467f958SSadaf Ebrahimi /**
18*6467f958SSadaf Ebrahimi * This is a header-only utility library that provides OpenCL host code with
19*6467f958SSadaf Ebrahimi * routines for converting to/from cl_half values.
20*6467f958SSadaf Ebrahimi *
21*6467f958SSadaf Ebrahimi * Example usage:
22*6467f958SSadaf Ebrahimi *
23*6467f958SSadaf Ebrahimi * #include <CL/cl_half.h>
24*6467f958SSadaf Ebrahimi * ...
25*6467f958SSadaf Ebrahimi * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
26*6467f958SSadaf Ebrahimi * cl_float f = cl_half_to_float(h);
27*6467f958SSadaf Ebrahimi */
28*6467f958SSadaf Ebrahimi
29*6467f958SSadaf Ebrahimi #ifndef OPENCL_CL_HALF_H
30*6467f958SSadaf Ebrahimi #define OPENCL_CL_HALF_H
31*6467f958SSadaf Ebrahimi
32*6467f958SSadaf Ebrahimi #include <CL/cl_platform.h>
33*6467f958SSadaf Ebrahimi
34*6467f958SSadaf Ebrahimi #include <stdint.h>
35*6467f958SSadaf Ebrahimi
36*6467f958SSadaf Ebrahimi #ifdef __cplusplus
37*6467f958SSadaf Ebrahimi extern "C" {
38*6467f958SSadaf Ebrahimi #endif
39*6467f958SSadaf Ebrahimi
40*6467f958SSadaf Ebrahimi
41*6467f958SSadaf Ebrahimi /**
42*6467f958SSadaf Ebrahimi * Rounding mode used when converting to cl_half.
43*6467f958SSadaf Ebrahimi */
44*6467f958SSadaf Ebrahimi typedef enum
45*6467f958SSadaf Ebrahimi {
46*6467f958SSadaf Ebrahimi CL_HALF_RTE, // round to nearest even
47*6467f958SSadaf Ebrahimi CL_HALF_RTZ, // round towards zero
48*6467f958SSadaf Ebrahimi CL_HALF_RTP, // round towards positive infinity
49*6467f958SSadaf Ebrahimi CL_HALF_RTN, // round towards negative infinity
50*6467f958SSadaf Ebrahimi } cl_half_rounding_mode;
51*6467f958SSadaf Ebrahimi
52*6467f958SSadaf Ebrahimi
53*6467f958SSadaf Ebrahimi /* Private utility macros. */
54*6467f958SSadaf Ebrahimi #define CL_HALF_EXP_MASK 0x7C00
55*6467f958SSadaf Ebrahimi #define CL_HALF_MAX_FINITE_MAG 0x7BFF
56*6467f958SSadaf Ebrahimi
57*6467f958SSadaf Ebrahimi
58*6467f958SSadaf Ebrahimi /*
59*6467f958SSadaf Ebrahimi * Utility to deal with values that overflow when converting to half precision.
60*6467f958SSadaf Ebrahimi */
cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,uint16_t sign)61*6467f958SSadaf Ebrahimi static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
62*6467f958SSadaf Ebrahimi uint16_t sign)
63*6467f958SSadaf Ebrahimi {
64*6467f958SSadaf Ebrahimi if (rounding_mode == CL_HALF_RTZ)
65*6467f958SSadaf Ebrahimi {
66*6467f958SSadaf Ebrahimi // Round overflow towards zero -> largest finite number (preserving sign)
67*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
68*6467f958SSadaf Ebrahimi }
69*6467f958SSadaf Ebrahimi else if (rounding_mode == CL_HALF_RTP && sign)
70*6467f958SSadaf Ebrahimi {
71*6467f958SSadaf Ebrahimi // Round negative overflow towards positive infinity -> most negative finite number
72*6467f958SSadaf Ebrahimi return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
73*6467f958SSadaf Ebrahimi }
74*6467f958SSadaf Ebrahimi else if (rounding_mode == CL_HALF_RTN && !sign)
75*6467f958SSadaf Ebrahimi {
76*6467f958SSadaf Ebrahimi // Round positive overflow towards negative infinity -> largest finite number
77*6467f958SSadaf Ebrahimi return CL_HALF_MAX_FINITE_MAG;
78*6467f958SSadaf Ebrahimi }
79*6467f958SSadaf Ebrahimi
80*6467f958SSadaf Ebrahimi // Overflow to infinity
81*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_EXP_MASK;
82*6467f958SSadaf Ebrahimi }
83*6467f958SSadaf Ebrahimi
84*6467f958SSadaf Ebrahimi /*
85*6467f958SSadaf Ebrahimi * Utility to deal with values that underflow when converting to half precision.
86*6467f958SSadaf Ebrahimi */
cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,uint16_t sign)87*6467f958SSadaf Ebrahimi static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
88*6467f958SSadaf Ebrahimi uint16_t sign)
89*6467f958SSadaf Ebrahimi {
90*6467f958SSadaf Ebrahimi if (rounding_mode == CL_HALF_RTP && !sign)
91*6467f958SSadaf Ebrahimi {
92*6467f958SSadaf Ebrahimi // Round underflow towards positive infinity -> smallest positive value
93*6467f958SSadaf Ebrahimi return (sign << 15) | 1;
94*6467f958SSadaf Ebrahimi }
95*6467f958SSadaf Ebrahimi else if (rounding_mode == CL_HALF_RTN && sign)
96*6467f958SSadaf Ebrahimi {
97*6467f958SSadaf Ebrahimi // Round underflow towards negative infinity -> largest negative value
98*6467f958SSadaf Ebrahimi return (sign << 15) | 1;
99*6467f958SSadaf Ebrahimi }
100*6467f958SSadaf Ebrahimi
101*6467f958SSadaf Ebrahimi // Flush to zero
102*6467f958SSadaf Ebrahimi return (sign << 15);
103*6467f958SSadaf Ebrahimi }
104*6467f958SSadaf Ebrahimi
105*6467f958SSadaf Ebrahimi
106*6467f958SSadaf Ebrahimi /**
107*6467f958SSadaf Ebrahimi * Convert a cl_float to a cl_half.
108*6467f958SSadaf Ebrahimi */
cl_half_from_float(cl_float f,cl_half_rounding_mode rounding_mode)109*6467f958SSadaf Ebrahimi static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
110*6467f958SSadaf Ebrahimi {
111*6467f958SSadaf Ebrahimi // Type-punning to get direct access to underlying bits
112*6467f958SSadaf Ebrahimi union
113*6467f958SSadaf Ebrahimi {
114*6467f958SSadaf Ebrahimi cl_float f;
115*6467f958SSadaf Ebrahimi uint32_t i;
116*6467f958SSadaf Ebrahimi } f32;
117*6467f958SSadaf Ebrahimi f32.f = f;
118*6467f958SSadaf Ebrahimi
119*6467f958SSadaf Ebrahimi // Extract sign bit
120*6467f958SSadaf Ebrahimi uint16_t sign = f32.i >> 31;
121*6467f958SSadaf Ebrahimi
122*6467f958SSadaf Ebrahimi // Extract FP32 exponent and mantissa
123*6467f958SSadaf Ebrahimi uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
124*6467f958SSadaf Ebrahimi uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
125*6467f958SSadaf Ebrahimi
126*6467f958SSadaf Ebrahimi // Remove FP32 exponent bias
127*6467f958SSadaf Ebrahimi int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
128*6467f958SSadaf Ebrahimi
129*6467f958SSadaf Ebrahimi // Add FP16 exponent bias
130*6467f958SSadaf Ebrahimi uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
131*6467f958SSadaf Ebrahimi
132*6467f958SSadaf Ebrahimi // Position of the bit that will become the FP16 mantissa LSB
133*6467f958SSadaf Ebrahimi uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
134*6467f958SSadaf Ebrahimi
135*6467f958SSadaf Ebrahimi // Check for NaN / infinity
136*6467f958SSadaf Ebrahimi if (f_exp == 0xFF)
137*6467f958SSadaf Ebrahimi {
138*6467f958SSadaf Ebrahimi if (f_mant)
139*6467f958SSadaf Ebrahimi {
140*6467f958SSadaf Ebrahimi // NaN -> propagate mantissa and silence it
141*6467f958SSadaf Ebrahimi uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
142*6467f958SSadaf Ebrahimi h_mant |= 0x200;
143*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
144*6467f958SSadaf Ebrahimi }
145*6467f958SSadaf Ebrahimi else
146*6467f958SSadaf Ebrahimi {
147*6467f958SSadaf Ebrahimi // Infinity -> zero mantissa
148*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_EXP_MASK;
149*6467f958SSadaf Ebrahimi }
150*6467f958SSadaf Ebrahimi }
151*6467f958SSadaf Ebrahimi
152*6467f958SSadaf Ebrahimi // Check for zero
153*6467f958SSadaf Ebrahimi if (!f_exp && !f_mant)
154*6467f958SSadaf Ebrahimi {
155*6467f958SSadaf Ebrahimi return (sign << 15);
156*6467f958SSadaf Ebrahimi }
157*6467f958SSadaf Ebrahimi
158*6467f958SSadaf Ebrahimi // Check for overflow
159*6467f958SSadaf Ebrahimi if (exp >= CL_HALF_MAX_EXP)
160*6467f958SSadaf Ebrahimi {
161*6467f958SSadaf Ebrahimi return cl_half_handle_overflow(rounding_mode, sign);
162*6467f958SSadaf Ebrahimi }
163*6467f958SSadaf Ebrahimi
164*6467f958SSadaf Ebrahimi // Check for underflow
165*6467f958SSadaf Ebrahimi if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
166*6467f958SSadaf Ebrahimi {
167*6467f958SSadaf Ebrahimi return cl_half_handle_underflow(rounding_mode, sign);
168*6467f958SSadaf Ebrahimi }
169*6467f958SSadaf Ebrahimi
170*6467f958SSadaf Ebrahimi // Check for value that will become denormal
171*6467f958SSadaf Ebrahimi if (exp < -14)
172*6467f958SSadaf Ebrahimi {
173*6467f958SSadaf Ebrahimi // Denormal -> include the implicit 1 from the FP32 mantissa
174*6467f958SSadaf Ebrahimi h_exp = 0;
175*6467f958SSadaf Ebrahimi f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
176*6467f958SSadaf Ebrahimi
177*6467f958SSadaf Ebrahimi // Mantissa shift amount depends on exponent
178*6467f958SSadaf Ebrahimi lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
179*6467f958SSadaf Ebrahimi }
180*6467f958SSadaf Ebrahimi
181*6467f958SSadaf Ebrahimi // Generate FP16 mantissa by shifting FP32 mantissa
182*6467f958SSadaf Ebrahimi uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
183*6467f958SSadaf Ebrahimi
184*6467f958SSadaf Ebrahimi // Check whether we need to round
185*6467f958SSadaf Ebrahimi uint32_t halfway = 1 << (lsb_pos - 1);
186*6467f958SSadaf Ebrahimi uint32_t mask = (halfway << 1) - 1;
187*6467f958SSadaf Ebrahimi switch (rounding_mode)
188*6467f958SSadaf Ebrahimi {
189*6467f958SSadaf Ebrahimi case CL_HALF_RTE:
190*6467f958SSadaf Ebrahimi if ((f_mant & mask) > halfway)
191*6467f958SSadaf Ebrahimi {
192*6467f958SSadaf Ebrahimi // More than halfway -> round up
193*6467f958SSadaf Ebrahimi h_mant += 1;
194*6467f958SSadaf Ebrahimi }
195*6467f958SSadaf Ebrahimi else if ((f_mant & mask) == halfway)
196*6467f958SSadaf Ebrahimi {
197*6467f958SSadaf Ebrahimi // Exactly halfway -> round to nearest even
198*6467f958SSadaf Ebrahimi if (h_mant & 0x1)
199*6467f958SSadaf Ebrahimi h_mant += 1;
200*6467f958SSadaf Ebrahimi }
201*6467f958SSadaf Ebrahimi break;
202*6467f958SSadaf Ebrahimi case CL_HALF_RTZ:
203*6467f958SSadaf Ebrahimi // Mantissa has already been truncated -> do nothing
204*6467f958SSadaf Ebrahimi break;
205*6467f958SSadaf Ebrahimi case CL_HALF_RTP:
206*6467f958SSadaf Ebrahimi if ((f_mant & mask) && !sign)
207*6467f958SSadaf Ebrahimi {
208*6467f958SSadaf Ebrahimi // Round positive numbers up
209*6467f958SSadaf Ebrahimi h_mant += 1;
210*6467f958SSadaf Ebrahimi }
211*6467f958SSadaf Ebrahimi break;
212*6467f958SSadaf Ebrahimi case CL_HALF_RTN:
213*6467f958SSadaf Ebrahimi if ((f_mant & mask) && sign)
214*6467f958SSadaf Ebrahimi {
215*6467f958SSadaf Ebrahimi // Round negative numbers down
216*6467f958SSadaf Ebrahimi h_mant += 1;
217*6467f958SSadaf Ebrahimi }
218*6467f958SSadaf Ebrahimi break;
219*6467f958SSadaf Ebrahimi }
220*6467f958SSadaf Ebrahimi
221*6467f958SSadaf Ebrahimi // Check for mantissa overflow
222*6467f958SSadaf Ebrahimi if (h_mant & 0x400)
223*6467f958SSadaf Ebrahimi {
224*6467f958SSadaf Ebrahimi h_exp += 1;
225*6467f958SSadaf Ebrahimi h_mant = 0;
226*6467f958SSadaf Ebrahimi }
227*6467f958SSadaf Ebrahimi
228*6467f958SSadaf Ebrahimi return (sign << 15) | (h_exp << 10) | h_mant;
229*6467f958SSadaf Ebrahimi }
230*6467f958SSadaf Ebrahimi
231*6467f958SSadaf Ebrahimi
232*6467f958SSadaf Ebrahimi /**
233*6467f958SSadaf Ebrahimi * Convert a cl_double to a cl_half.
234*6467f958SSadaf Ebrahimi */
cl_half_from_double(cl_double d,cl_half_rounding_mode rounding_mode)235*6467f958SSadaf Ebrahimi static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
236*6467f958SSadaf Ebrahimi {
237*6467f958SSadaf Ebrahimi // Type-punning to get direct access to underlying bits
238*6467f958SSadaf Ebrahimi union
239*6467f958SSadaf Ebrahimi {
240*6467f958SSadaf Ebrahimi cl_double d;
241*6467f958SSadaf Ebrahimi uint64_t i;
242*6467f958SSadaf Ebrahimi } f64;
243*6467f958SSadaf Ebrahimi f64.d = d;
244*6467f958SSadaf Ebrahimi
245*6467f958SSadaf Ebrahimi // Extract sign bit
246*6467f958SSadaf Ebrahimi uint16_t sign = f64.i >> 63;
247*6467f958SSadaf Ebrahimi
248*6467f958SSadaf Ebrahimi // Extract FP64 exponent and mantissa
249*6467f958SSadaf Ebrahimi uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
250*6467f958SSadaf Ebrahimi uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
251*6467f958SSadaf Ebrahimi
252*6467f958SSadaf Ebrahimi // Remove FP64 exponent bias
253*6467f958SSadaf Ebrahimi int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
254*6467f958SSadaf Ebrahimi
255*6467f958SSadaf Ebrahimi // Add FP16 exponent bias
256*6467f958SSadaf Ebrahimi uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
257*6467f958SSadaf Ebrahimi
258*6467f958SSadaf Ebrahimi // Position of the bit that will become the FP16 mantissa LSB
259*6467f958SSadaf Ebrahimi uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
260*6467f958SSadaf Ebrahimi
261*6467f958SSadaf Ebrahimi // Check for NaN / infinity
262*6467f958SSadaf Ebrahimi if (d_exp == 0x7FF)
263*6467f958SSadaf Ebrahimi {
264*6467f958SSadaf Ebrahimi if (d_mant)
265*6467f958SSadaf Ebrahimi {
266*6467f958SSadaf Ebrahimi // NaN -> propagate mantissa and silence it
267*6467f958SSadaf Ebrahimi uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
268*6467f958SSadaf Ebrahimi h_mant |= 0x200;
269*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
270*6467f958SSadaf Ebrahimi }
271*6467f958SSadaf Ebrahimi else
272*6467f958SSadaf Ebrahimi {
273*6467f958SSadaf Ebrahimi // Infinity -> zero mantissa
274*6467f958SSadaf Ebrahimi return (sign << 15) | CL_HALF_EXP_MASK;
275*6467f958SSadaf Ebrahimi }
276*6467f958SSadaf Ebrahimi }
277*6467f958SSadaf Ebrahimi
278*6467f958SSadaf Ebrahimi // Check for zero
279*6467f958SSadaf Ebrahimi if (!d_exp && !d_mant)
280*6467f958SSadaf Ebrahimi {
281*6467f958SSadaf Ebrahimi return (sign << 15);
282*6467f958SSadaf Ebrahimi }
283*6467f958SSadaf Ebrahimi
284*6467f958SSadaf Ebrahimi // Check for overflow
285*6467f958SSadaf Ebrahimi if (exp >= CL_HALF_MAX_EXP)
286*6467f958SSadaf Ebrahimi {
287*6467f958SSadaf Ebrahimi return cl_half_handle_overflow(rounding_mode, sign);
288*6467f958SSadaf Ebrahimi }
289*6467f958SSadaf Ebrahimi
290*6467f958SSadaf Ebrahimi // Check for underflow
291*6467f958SSadaf Ebrahimi if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
292*6467f958SSadaf Ebrahimi {
293*6467f958SSadaf Ebrahimi return cl_half_handle_underflow(rounding_mode, sign);
294*6467f958SSadaf Ebrahimi }
295*6467f958SSadaf Ebrahimi
296*6467f958SSadaf Ebrahimi // Check for value that will become denormal
297*6467f958SSadaf Ebrahimi if (exp < -14)
298*6467f958SSadaf Ebrahimi {
299*6467f958SSadaf Ebrahimi // Include the implicit 1 from the FP64 mantissa
300*6467f958SSadaf Ebrahimi h_exp = 0;
301*6467f958SSadaf Ebrahimi d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
302*6467f958SSadaf Ebrahimi
303*6467f958SSadaf Ebrahimi // Mantissa shift amount depends on exponent
304*6467f958SSadaf Ebrahimi lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
305*6467f958SSadaf Ebrahimi }
306*6467f958SSadaf Ebrahimi
307*6467f958SSadaf Ebrahimi // Generate FP16 mantissa by shifting FP64 mantissa
308*6467f958SSadaf Ebrahimi uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
309*6467f958SSadaf Ebrahimi
310*6467f958SSadaf Ebrahimi // Check whether we need to round
311*6467f958SSadaf Ebrahimi uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
312*6467f958SSadaf Ebrahimi uint64_t mask = (halfway << 1) - 1;
313*6467f958SSadaf Ebrahimi switch (rounding_mode)
314*6467f958SSadaf Ebrahimi {
315*6467f958SSadaf Ebrahimi case CL_HALF_RTE:
316*6467f958SSadaf Ebrahimi if ((d_mant & mask) > halfway)
317*6467f958SSadaf Ebrahimi {
318*6467f958SSadaf Ebrahimi // More than halfway -> round up
319*6467f958SSadaf Ebrahimi h_mant += 1;
320*6467f958SSadaf Ebrahimi }
321*6467f958SSadaf Ebrahimi else if ((d_mant & mask) == halfway)
322*6467f958SSadaf Ebrahimi {
323*6467f958SSadaf Ebrahimi // Exactly halfway -> round to nearest even
324*6467f958SSadaf Ebrahimi if (h_mant & 0x1)
325*6467f958SSadaf Ebrahimi h_mant += 1;
326*6467f958SSadaf Ebrahimi }
327*6467f958SSadaf Ebrahimi break;
328*6467f958SSadaf Ebrahimi case CL_HALF_RTZ:
329*6467f958SSadaf Ebrahimi // Mantissa has already been truncated -> do nothing
330*6467f958SSadaf Ebrahimi break;
331*6467f958SSadaf Ebrahimi case CL_HALF_RTP:
332*6467f958SSadaf Ebrahimi if ((d_mant & mask) && !sign)
333*6467f958SSadaf Ebrahimi {
334*6467f958SSadaf Ebrahimi // Round positive numbers up
335*6467f958SSadaf Ebrahimi h_mant += 1;
336*6467f958SSadaf Ebrahimi }
337*6467f958SSadaf Ebrahimi break;
338*6467f958SSadaf Ebrahimi case CL_HALF_RTN:
339*6467f958SSadaf Ebrahimi if ((d_mant & mask) && sign)
340*6467f958SSadaf Ebrahimi {
341*6467f958SSadaf Ebrahimi // Round negative numbers down
342*6467f958SSadaf Ebrahimi h_mant += 1;
343*6467f958SSadaf Ebrahimi }
344*6467f958SSadaf Ebrahimi break;
345*6467f958SSadaf Ebrahimi }
346*6467f958SSadaf Ebrahimi
347*6467f958SSadaf Ebrahimi // Check for mantissa overflow
348*6467f958SSadaf Ebrahimi if (h_mant & 0x400)
349*6467f958SSadaf Ebrahimi {
350*6467f958SSadaf Ebrahimi h_exp += 1;
351*6467f958SSadaf Ebrahimi h_mant = 0;
352*6467f958SSadaf Ebrahimi }
353*6467f958SSadaf Ebrahimi
354*6467f958SSadaf Ebrahimi return (sign << 15) | (h_exp << 10) | h_mant;
355*6467f958SSadaf Ebrahimi }
356*6467f958SSadaf Ebrahimi
357*6467f958SSadaf Ebrahimi
358*6467f958SSadaf Ebrahimi /**
359*6467f958SSadaf Ebrahimi * Convert a cl_half to a cl_float.
360*6467f958SSadaf Ebrahimi */
cl_half_to_float(cl_half h)361*6467f958SSadaf Ebrahimi static inline cl_float cl_half_to_float(cl_half h)
362*6467f958SSadaf Ebrahimi {
363*6467f958SSadaf Ebrahimi // Type-punning to get direct access to underlying bits
364*6467f958SSadaf Ebrahimi union
365*6467f958SSadaf Ebrahimi {
366*6467f958SSadaf Ebrahimi cl_float f;
367*6467f958SSadaf Ebrahimi uint32_t i;
368*6467f958SSadaf Ebrahimi } f32;
369*6467f958SSadaf Ebrahimi
370*6467f958SSadaf Ebrahimi // Extract sign bit
371*6467f958SSadaf Ebrahimi uint16_t sign = h >> 15;
372*6467f958SSadaf Ebrahimi
373*6467f958SSadaf Ebrahimi // Extract FP16 exponent and mantissa
374*6467f958SSadaf Ebrahimi uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
375*6467f958SSadaf Ebrahimi uint16_t h_mant = h & 0x3FF;
376*6467f958SSadaf Ebrahimi
377*6467f958SSadaf Ebrahimi // Remove FP16 exponent bias
378*6467f958SSadaf Ebrahimi int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
379*6467f958SSadaf Ebrahimi
380*6467f958SSadaf Ebrahimi // Add FP32 exponent bias
381*6467f958SSadaf Ebrahimi uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
382*6467f958SSadaf Ebrahimi
383*6467f958SSadaf Ebrahimi // Check for NaN / infinity
384*6467f958SSadaf Ebrahimi if (h_exp == 0x1F)
385*6467f958SSadaf Ebrahimi {
386*6467f958SSadaf Ebrahimi if (h_mant)
387*6467f958SSadaf Ebrahimi {
388*6467f958SSadaf Ebrahimi // NaN -> propagate mantissa and silence it
389*6467f958SSadaf Ebrahimi uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
390*6467f958SSadaf Ebrahimi f_mant |= 0x400000;
391*6467f958SSadaf Ebrahimi f32.i = (sign << 31) | 0x7F800000 | f_mant;
392*6467f958SSadaf Ebrahimi return f32.f;
393*6467f958SSadaf Ebrahimi }
394*6467f958SSadaf Ebrahimi else
395*6467f958SSadaf Ebrahimi {
396*6467f958SSadaf Ebrahimi // Infinity -> zero mantissa
397*6467f958SSadaf Ebrahimi f32.i = (sign << 31) | 0x7F800000;
398*6467f958SSadaf Ebrahimi return f32.f;
399*6467f958SSadaf Ebrahimi }
400*6467f958SSadaf Ebrahimi }
401*6467f958SSadaf Ebrahimi
402*6467f958SSadaf Ebrahimi // Check for zero / denormal
403*6467f958SSadaf Ebrahimi if (h_exp == 0)
404*6467f958SSadaf Ebrahimi {
405*6467f958SSadaf Ebrahimi if (h_mant == 0)
406*6467f958SSadaf Ebrahimi {
407*6467f958SSadaf Ebrahimi // Zero -> zero exponent
408*6467f958SSadaf Ebrahimi f_exp = 0;
409*6467f958SSadaf Ebrahimi }
410*6467f958SSadaf Ebrahimi else
411*6467f958SSadaf Ebrahimi {
412*6467f958SSadaf Ebrahimi // Denormal -> normalize it
413*6467f958SSadaf Ebrahimi // - Shift mantissa to make most-significant 1 implicit
414*6467f958SSadaf Ebrahimi // - Adjust exponent accordingly
415*6467f958SSadaf Ebrahimi uint32_t shift = 0;
416*6467f958SSadaf Ebrahimi while ((h_mant & 0x400) == 0)
417*6467f958SSadaf Ebrahimi {
418*6467f958SSadaf Ebrahimi h_mant <<= 1;
419*6467f958SSadaf Ebrahimi shift++;
420*6467f958SSadaf Ebrahimi }
421*6467f958SSadaf Ebrahimi h_mant &= 0x3FF;
422*6467f958SSadaf Ebrahimi f_exp -= shift - 1;
423*6467f958SSadaf Ebrahimi }
424*6467f958SSadaf Ebrahimi }
425*6467f958SSadaf Ebrahimi
426*6467f958SSadaf Ebrahimi f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
427*6467f958SSadaf Ebrahimi return f32.f;
428*6467f958SSadaf Ebrahimi }
429*6467f958SSadaf Ebrahimi
430*6467f958SSadaf Ebrahimi
431*6467f958SSadaf Ebrahimi #undef CL_HALF_EXP_MASK
432*6467f958SSadaf Ebrahimi #undef CL_HALF_MAX_FINITE_MAG
433*6467f958SSadaf Ebrahimi
434*6467f958SSadaf Ebrahimi
435*6467f958SSadaf Ebrahimi #ifdef __cplusplus
436*6467f958SSadaf Ebrahimi }
437*6467f958SSadaf Ebrahimi #endif
438*6467f958SSadaf Ebrahimi
439*6467f958SSadaf Ebrahimi
440*6467f958SSadaf Ebrahimi #endif /* OPENCL_CL_HALF_H */
441