xref: /aosp_15_r20/external/mesa3d/src/util/softfloat.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * License for Berkeley SoftFloat Release 3e
3  *
4  * John R. Hauser
5  * 2018 January 20
6  *
7  * The following applies to the whole of SoftFloat Release 3e as well as to
8  * each source file individually.
9  *
10  * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
11  * University of California.  All rights reserved.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions are met:
15  *
16  *  1. Redistributions of source code must retain the above copyright notice,
17  *     this list of conditions, and the following disclaimer.
18  *
19  *  2. Redistributions in binary form must reproduce the above copyright
20  *     notice, this list of conditions, and the following disclaimer in the
21  *     documentation and/or other materials provided with the distribution.
22  *
23  *  3. Neither the name of the University nor the names of its contributors
24  *     may be used to endorse or promote products derived from this software
25  *     without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
28  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
30  * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
31  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
34  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
36  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  *
38  *
39  * The functions listed in this file are modified versions of the ones
40  * from the Berkeley SoftFloat 3e Library.
41  *
42  * Their implementation correctness has been checked with the Berkeley
43  * TestFloat Release 3e tool for x86_64.
44  */
45 
46 #include "rounding.h"
47 #include "bitscan.h"
48 #include "softfloat.h"
49 
50 #if defined(BIG_ENDIAN)
51 #define word_incr -1
52 #define index_word(total, n) ((total) - 1 - (n))
53 #define index_word_hi(total) 0
54 #define index_word_lo(total) ((total) - 1)
55 #define index_multiword_hi(total, n) 0
56 #define index_multiword_lo(total, n) ((total) - (n))
57 #define index_multiword_hi_but(total, n) 0
58 #define index_multiword_lo_but(total, n) (n)
59 #else
60 #define word_incr 1
61 #define index_word(total, n) (n)
62 #define index_word_hi(total) ((total) - 1)
63 #define index_word_lo(total) 0
64 #define index_multiword_hi(total, n) ((total) - (n))
65 #define index_multiword_lo(total, n) 0
66 #define index_multiword_hi_but(total, n) (n)
67 #define index_multiword_lo_but(total, n) 0
68 #endif
69 
70 typedef union { double f; int64_t i; uint64_t u; } di_type;
71 typedef union { float f; int32_t i; uint32_t u; } fi_type;
72 
73 /**
74  * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in
75  * the range 1 to 63.  If any nonzero bits are shifted off, they are "jammed"
76  * into the least-significant bit of the shifted value by setting the
77  * least-significant bit to 1.  This shifted-and-jammed value is returned.
78  *
79  * From softfloat_shortShiftRightJam64()
80  */
81 static inline
_mesa_short_shift_right_jam64(uint64_t a,uint8_t dist)82 uint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist)
83 {
84     return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0);
85 }
86 
87 /**
88  * \brief Shifts 'a' right by the number of bits given in 'dist', which must not
89  * be zero.  If any nonzero bits are shifted off, they are "jammed" into the
90  * least-significant bit of the shifted value by setting the least-significant
91  * bit to 1.  This shifted-and-jammed value is returned.
92  * The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
93  * greater than 64, the result will be either 0 or 1, depending on whether 'a'
94  * is zero or nonzero.
95  *
96  * From softfloat_shiftRightJam64()
97  */
98 static inline
_mesa_shift_right_jam64(uint64_t a,uint32_t dist)99 uint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist)
100 {
101     return
102         (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0);
103 }
104 
105 /**
106  * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be
107  * zero.  If any nonzero bits are shifted off, they are "jammed" into the
108  * least-significant bit of the shifted value by setting the least-significant
109  * bit to 1.  This shifted-and-jammed value is returned.
110  * The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
111  * greater than 32, the result will be either 0 or 1, depending on whether 'a'
112  * is zero or nonzero.
113  *
114  * From softfloat_shiftRightJam32()
115  */
116 static inline
_mesa_shift_right_jam32(uint32_t a,uint16_t dist)117 uint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist)
118 {
119     return
120         (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0);
121 }
122 
123 /**
124  * \brief Extracted from softfloat_roundPackToF64()
125  */
126 static inline
_mesa_roundtozero_f64(int64_t s,int64_t e,int64_t m)127 double _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m)
128 {
129     di_type result;
130 
131     if ((uint64_t) e >= 0x7fd) {
132         if (e < 0) {
133             m = _mesa_shift_right_jam64(m, -e);
134             e = 0;
135         } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) {
136             e = 0x7ff;
137             m = 0;
138             result.u = (s << 63) + (e << 52) + m;
139             result.u -= 1;
140             return result.f;
141         }
142     }
143 
144     m >>= 10;
145     if (m == 0)
146         e = 0;
147 
148     result.u = (s << 63) + (e << 52) + m;
149     return result.f;
150 }
151 
152 /**
153  * \brief Extracted from softfloat_roundPackToF32()
154  */
155 static inline
_mesa_round_f32(int32_t s,int32_t e,int32_t m,bool rtz)156 float _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz)
157 {
158     fi_type result;
159     uint8_t round_increment = rtz ? 0 : 0x40;
160 
161     if ((uint32_t) e >= 0xfd) {
162         if (e < 0) {
163             m = _mesa_shift_right_jam32(m, -e);
164             e = 0;
165         } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) {
166             e = 0xff;
167             m = 0;
168             result.u = (s << 31) + (e << 23) + m;
169             result.u -= !round_increment;
170             return result.f;
171         }
172     }
173 
174     uint8_t round_bits;
175     round_bits = m & 0x7f;
176     m = ((uint32_t) m + round_increment) >> 7;
177     m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz);
178     if (m == 0)
179         e = 0;
180 
181     result.u = (s << 31) + (e << 23) + m;
182     return result.f;
183 }
184 
185 /**
186  * \brief Extracted from softfloat_roundPackToF16()
187  */
188 static inline
_mesa_roundtozero_f16(int16_t s,int16_t e,int16_t m)189 uint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m)
190 {
191     if ((uint16_t) e >= 0x1d) {
192         if (e < 0) {
193             m = _mesa_shift_right_jam32(m, -e);
194             e = 0;
195         } else if (e > 0x1d) {
196             e = 0x1f;
197             m = 0;
198             return (s << 15) + (e << 10) + m - 1;
199         }
200     }
201 
202     m >>= 4;
203     if (m == 0)
204         e = 0;
205 
206     return (s << 15) + (e << 10) + m;
207 }
208 
209 /**
210  * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
211  * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
212  * must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
213  * shifted N-bit result is stored at the location pointed to by 'm_out'.  Each
214  * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
215  * that concatenate in the platform's normal endian order to form an N-bit
216  * integer.
217  *
218  * From softfloat_shortShiftLeftM()
219  */
220 static inline void
_mesa_short_shift_left_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)221 _mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
222 {
223     uint8_t neg_dist;
224     unsigned index, last_index;
225     uint32_t part_word, a_word;
226 
227     neg_dist = -dist;
228     index = index_word_hi(size_words);
229     last_index = index_word_lo(size_words);
230     part_word = a[index] << dist;
231     while (index != last_index) {
232         a_word = a[index - word_incr];
233         m_out[index] = part_word | a_word >> (neg_dist & 31);
234         index -= word_incr;
235         part_word = a_word << dist;
236     }
237     m_out[index] = part_word;
238 }
239 
240 /**
241  * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
242  * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
243  * must not be zero.  Any nonzero bits shifted off are lost.  The shifted
244  * N-bit result is stored at the location pointed to by 'm_out'.  Each of 'a'
245  * and 'm_out' points to a 'size_words'-long array of 32-bit elements that
246  * concatenate in the platform's normal endian order to form an N-bit
247  * integer. The value of 'dist' can be arbitrarily large.  In particular, if
248  * 'dist' is greater than N, the stored result will be 0.
249  *
250  * From softfloat_shiftLeftM()
251  */
252 static inline void
_mesa_shift_left_m(uint8_t size_words,const uint32_t * a,uint32_t dist,uint32_t * m_out)253 _mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out)
254 {
255     uint32_t word_dist;
256     uint8_t inner_dist;
257     uint8_t i;
258 
259     word_dist = dist >> 5;
260     if (word_dist < size_words) {
261         a += index_multiword_lo_but(size_words, word_dist);
262         inner_dist = dist & 31;
263         if (inner_dist) {
264             _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist,
265                                      m_out + index_multiword_hi_but(size_words, word_dist));
266             if (!word_dist)
267                 return;
268         } else {
269             uint32_t *dest = m_out + index_word_hi(size_words);
270             a += index_word_hi(size_words - word_dist);
271             for (i = size_words - word_dist; i; --i) {
272                 *dest = *a;
273                 a -= word_incr;
274                 dest -= word_incr;
275             }
276         }
277         m_out += index_multiword_lo(size_words, word_dist);
278     } else {
279         word_dist = size_words;
280     }
281     do {
282         *m_out++ = 0;
283         --word_dist;
284     } while (word_dist);
285 }
286 
287 /**
288  * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
289  * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
290  * must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
291  * shifted N-bit result is stored at the location pointed to by 'm_out'.  Each
292  * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
293  * that concatenate in the platform's normal endian order to form an N-bit
294  * integer.
295  *
296  * From softfloat_shortShiftRightM()
297  */
298 static inline void
_mesa_short_shift_right_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)299 _mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
300 {
301     uint8_t neg_dist;
302     unsigned index, last_index;
303     uint32_t part_word, a_word;
304 
305     neg_dist = -dist;
306     index = index_word_lo(size_words);
307     last_index = index_word_hi(size_words);
308     part_word = a[index] >> dist;
309     while (index != last_index) {
310         a_word = a[index + word_incr];
311         m_out[index] = a_word << (neg_dist & 31) | part_word;
312         index += word_incr;
313         part_word = a_word >> dist;
314     }
315     m_out[index] = part_word;
316 }
317 
318 /**
319  * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
320  * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
321  * must be in the range 1 to 31.  If any nonzero bits are shifted off, they
322  * are "jammed" into the least-significant bit of the shifted value by setting
323  * the least-significant bit to 1.  This shifted-and-jammed N-bit result is
324  * stored at the location pointed to by 'm_out'.  Each of 'a' and 'm_out'
325  * points to a 'size_words'-long array of 32-bit elements that concatenate in
326  * the platform's normal endian order to form an N-bit integer.
327  *
328  *
329  * From softfloat_shortShiftRightJamM()
330  */
331 static inline void
_mesa_short_shift_right_jam_m(uint8_t size_words,const uint32_t * a,uint8_t dist,uint32_t * m_out)332 _mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out)
333 {
334     uint8_t neg_dist;
335     unsigned index, last_index;
336     uint64_t part_word, a_word;
337 
338     neg_dist = -dist;
339     index = index_word_lo(size_words);
340     last_index = index_word_hi(size_words);
341     a_word = a[index];
342     part_word = a_word >> dist;
343     if (part_word << dist != a_word )
344         part_word |= 1;
345     while (index != last_index) {
346         a_word = a[index + word_incr];
347         m_out[index] = a_word << (neg_dist & 31) | part_word;
348         index += word_incr;
349         part_word = a_word >> dist;
350     }
351     m_out[index] = part_word;
352 }
353 
354 /**
355  * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
356  * bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
357  * must not be zero.  If any nonzero bits are shifted off, they are "jammed"
358  * into the least-significant bit of the shifted value by setting the
359  * least-significant bit to 1.  This shifted-and-jammed N-bit result is stored
360  * at the location pointed to by 'm_out'.  Each of 'a' and 'm_out' points to a
361  * 'size_words'-long array of 32-bit elements that concatenate in the
362  * platform's normal endian order to form an N-bit integer.  The value of
363  * 'dist' can be arbitrarily large.  In particular, if 'dist' is greater than
364  * N, the stored result will be either 0 or 1, depending on whether the
365  * original N bits are all zeros.
366  *
367  * From softfloat_shiftRightJamM()
368  */
369 static inline void
_mesa_shift_right_jam_m(uint8_t size_words,const uint32_t * a,uint32_t dist,uint32_t * m_out)370 _mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out)
371 {
372     uint32_t word_jam, word_dist, *tmp;
373     uint8_t i, inner_dist;
374 
375     word_jam = 0;
376     word_dist = dist >> 5;
377     tmp = NULL;
378     if (word_dist) {
379         if (size_words < word_dist)
380             word_dist = size_words;
381         tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist));
382         i = word_dist;
383         do {
384             word_jam = *tmp++;
385             if (word_jam)
386                 break;
387             --i;
388         } while (i);
389         tmp = m_out;
390     }
391     if (word_dist < size_words) {
392         a += index_multiword_hi_but(size_words, word_dist);
393         inner_dist = dist & 31;
394         if (inner_dist) {
395             _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist,
396                                           m_out + index_multiword_lo_but(size_words, word_dist));
397             if (!word_dist) {
398                 if (word_jam)
399                     m_out[index_word_lo(size_words)] |= 1;
400                 return;
401             }
402         } else {
403             a += index_word_lo(size_words - word_dist);
404             tmp = m_out + index_word_lo(size_words);
405             for (i = size_words - word_dist; i; --i) {
406                 *tmp = *a;
407                 a += word_incr;
408                 tmp += word_incr;
409             }
410         }
411         tmp = m_out + index_multiword_hi(size_words, word_dist);
412     }
413     if (tmp) {
414        do {
415            *tmp++ = 0;
416            --word_dist;
417        } while (word_dist);
418     }
419     if (word_jam)
420         m_out[index_word_lo(size_words)] |= 1;
421 }
422 
423 /**
424  * \brief Calculate a + b but rounding to zero.
425  *
426  * Notice that this mainly differs from the original Berkeley SoftFloat 3e
427  * implementation in that we don't really treat NaNs, Zeroes nor the
428  * signalling flags. Any NaN is good for us and the sign of the Zero is not
429  * important.
430  *
431  * From f64_add()
432  */
433 double
_mesa_double_add_rtz(double a,double b)434 _mesa_double_add_rtz(double a, double b)
435 {
436     const di_type a_di = {a};
437     uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
438     uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
439     uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
440     const di_type b_di = {b};
441     uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
442     uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
443     uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
444     int64_t s, e, m = 0;
445 
446     s = a_flt_s;
447 
448     const int64_t exp_diff = a_flt_e - b_flt_e;
449 
450     /* Handle special cases */
451 
452     if (a_flt_s != b_flt_s) {
453         return _mesa_double_sub_rtz(a, -b);
454     } else if ((a_flt_e == 0) && (a_flt_m == 0)) {
455         /* 'a' is zero, return 'b' */
456         return b;
457     } else if ((b_flt_e == 0) && (b_flt_m == 0)) {
458         /* 'b' is zero, return 'a' */
459         return a;
460     } else if (a_flt_e == 0x7ff && a_flt_m != 0) {
461         /* 'a' is a NaN, return NaN */
462         return a;
463     } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
464         /* 'b' is a NaN, return NaN */
465         return b;
466     } else if (a_flt_e == 0x7ff && a_flt_m == 0) {
467         /* Inf + x = Inf */
468         return a;
469     } else if (b_flt_e == 0x7ff && b_flt_m == 0) {
470         /* x + Inf = Inf */
471         return b;
472     } else if (exp_diff == 0 && a_flt_e == 0) {
473         di_type result_di;
474         result_di.u = a_di.u + b_flt_m;
475         return result_di.f;
476     } else if (exp_diff == 0) {
477         e = a_flt_e;
478         m = 0x0020000000000000 + a_flt_m + b_flt_m;
479         m <<= 9;
480     } else if (exp_diff < 0) {
481         a_flt_m <<= 9;
482         b_flt_m <<= 9;
483         e = b_flt_e;
484 
485         if (a_flt_e != 0)
486             a_flt_m += 0x2000000000000000;
487         else
488             a_flt_m <<= 1;
489 
490         a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff);
491         m = 0x2000000000000000 + a_flt_m + b_flt_m;
492         if (m < 0x4000000000000000) {
493             --e;
494             m <<= 1;
495         }
496     } else {
497         a_flt_m <<= 9;
498         b_flt_m <<= 9;
499         e = a_flt_e;
500 
501         if (b_flt_e != 0)
502             b_flt_m += 0x2000000000000000;
503         else
504             b_flt_m <<= 1;
505 
506         b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff);
507         m = 0x2000000000000000 + a_flt_m + b_flt_m;
508         if (m < 0x4000000000000000) {
509             --e;
510             m <<= 1;
511         }
512     }
513 
514     return _mesa_roundtozero_f64(s, e, m);
515 }
516 
517 /**
518  * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
519  * 'a'.  If 'a' is zero, 64 is returned.
520  */
521 static inline unsigned
_mesa_count_leading_zeros64(uint64_t a)522 _mesa_count_leading_zeros64(uint64_t a)
523 {
524     return 64 - util_last_bit64(a);
525 }
526 
527 /**
528  * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
529  * 'a'.  If 'a' is zero, 32 is returned.
530  */
531 static inline unsigned
_mesa_count_leading_zeros32(uint32_t a)532 _mesa_count_leading_zeros32(uint32_t a)
533 {
534     return 32 - util_last_bit(a);
535 }
536 
537 static inline double
_mesa_norm_round_pack_f64(int64_t s,int64_t e,int64_t m)538 _mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m)
539 {
540     int8_t shift_dist;
541 
542     shift_dist = _mesa_count_leading_zeros64(m) - 1;
543     e -= shift_dist;
544     if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) {
545         di_type result;
546         result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10));
547         return result.f;
548     } else {
549         return _mesa_roundtozero_f64(s, e, m << shift_dist);
550     }
551 }
552 
553 /**
554  * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the
555  * 2s-complement of itself, where N = 'size_words' * 32.  Argument 'm_out'
556  * points to a 'size_words'-long array of 32-bit elements that concatenate in
557  * the platform's normal endian order to form an N-bit integer.
558  *
559  * From softfloat_negXM()
560  */
561 static inline void
_mesa_neg_x_m(uint8_t size_words,uint32_t * m_out)562 _mesa_neg_x_m(uint8_t size_words, uint32_t *m_out)
563 {
564     unsigned index, last_index;
565     uint8_t carry;
566     uint32_t word;
567 
568     index = index_word_lo(size_words);
569     last_index = index_word_hi(size_words);
570     carry = 1;
571     for (;;) {
572         word = ~m_out[index] + carry;
573         m_out[index] = word;
574         if (index == last_index)
575             break;
576         index += word_incr;
577         if (word)
578             carry = 0;
579     }
580 }
581 
582 /**
583  * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N =
584  * 'size_words' * 32.  The addition is modulo 2^N, so any carry out is
585  * lost. The N-bit sum is stored at the location pointed to by 'm_out'.  Each
586  * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit
587  * elements that concatenate in the platform's normal endian order to form an
588  * N-bit integer.
589  *
590  * From softfloat_addM()
591  */
592 static inline void
_mesa_add_m(uint8_t size_words,const uint32_t * a,const uint32_t * b,uint32_t * m_out)593 _mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out)
594 {
595     unsigned index, last_index;
596     uint8_t carry;
597     uint32_t a_word, word;
598 
599     index = index_word_lo(size_words);
600     last_index = index_word_hi(size_words);
601     carry = 0;
602     for (;;) {
603         a_word = a[index];
604         word = a_word + b[index] + carry;
605         m_out[index] = word;
606         if (index == last_index)
607             break;
608         if (word != a_word)
609             carry = (word < a_word);
610         index += word_incr;
611     }
612 }
613 
614 /**
615  * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N =
616  * 'size_words' * 32.  The subtraction is modulo 2^N, so any borrow out (carry
617  * out) is lost.  The N-bit difference is stored at the location pointed to by
618  * 'm_out'.  Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array
619  * of 32-bit elements that concatenate in the platform's normal endian order
620  * to form an N-bit integer.
621  *
622  * From softfloat_subM()
623  */
624 static inline void
_mesa_sub_m(uint8_t size_words,const uint32_t * a,const uint32_t * b,uint32_t * m_out)625 _mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out)
626 {
627     unsigned index, last_index;
628     uint8_t borrow;
629     uint32_t a_word, b_word;
630 
631     index = index_word_lo(size_words);
632     last_index = index_word_hi(size_words);
633     borrow = 0;
634     for (;;) {
635         a_word = a[index];
636         b_word = b[index];
637         m_out[index] = a_word - b_word - borrow;
638         if (index == last_index)
639             break;
640         borrow = borrow ? (a_word <= b_word) : (a_word < b_word);
641         index += word_incr;
642     }
643 }
644 
645 /* Calculate a - b but rounding to zero.
646  *
647  * Notice that this mainly differs from the original Berkeley SoftFloat 3e
648  * implementation in that we don't really treat NaNs, Zeroes nor the
649  * signalling flags. Any NaN is good for us and the sign of the Zero is not
650  * important.
651  *
652  * From f64_sub()
653  */
654 double
_mesa_double_sub_rtz(double a,double b)655 _mesa_double_sub_rtz(double a, double b)
656 {
657     const di_type a_di = {a};
658     uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
659     uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
660     uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
661     const di_type b_di = {b};
662     uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
663     uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
664     uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
665     int64_t s, e, m = 0;
666     int64_t m_diff = 0;
667     unsigned shift_dist = 0;
668 
669     s = a_flt_s;
670 
671     const int64_t exp_diff = a_flt_e - b_flt_e;
672 
673     /* Handle special cases */
674 
675     if (a_flt_s != b_flt_s) {
676         return _mesa_double_add_rtz(a, -b);
677     } else if ((a_flt_e == 0) && (a_flt_m == 0)) {
678         /* 'a' is zero, return '-b' */
679         return -b;
680     } else if ((b_flt_e == 0) && (b_flt_m == 0)) {
681         /* 'b' is zero, return 'a' */
682         return a;
683     } else if (a_flt_e == 0x7ff && a_flt_m != 0) {
684         /* 'a' is a NaN, return NaN */
685         return a;
686     } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
687         /* 'b' is a NaN, return NaN */
688         return b;
689     } else if (a_flt_e == 0x7ff && a_flt_m == 0) {
690         if (b_flt_e == 0x7ff && b_flt_m == 0) {
691             /* Inf - Inf =  NaN */
692             di_type result;
693             e = 0x7ff;
694             result.u = (s << 63) + (e << 52) + 0x1;
695             return result.f;
696         }
697         /* Inf - x = Inf */
698         return a;
699     } else if (b_flt_e == 0x7ff && b_flt_m == 0) {
700         /* x - Inf = -Inf */
701         return -b;
702     } else if (exp_diff == 0) {
703         m_diff = a_flt_m - b_flt_m;
704 
705         if (m_diff == 0)
706             return 0;
707         if (a_flt_e)
708             --a_flt_e;
709         if (m_diff < 0) {
710             s = !s;
711             m_diff = -m_diff;
712         }
713 
714         shift_dist = _mesa_count_leading_zeros64(m_diff) - 11;
715         e = a_flt_e - shift_dist;
716         if (e < 0) {
717             shift_dist = a_flt_e;
718             e = 0;
719         }
720 
721         di_type result;
722         result.u = (s << 63) + (e << 52) + (m_diff << shift_dist);
723         return result.f;
724     } else if (exp_diff < 0) {
725         a_flt_m <<= 10;
726         b_flt_m <<= 10;
727         s = !s;
728 
729         a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m;
730         a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff);
731         b_flt_m |= 0x4000000000000000;
732         e = b_flt_e;
733         m = b_flt_m - a_flt_m;
734     } else {
735         a_flt_m <<= 10;
736         b_flt_m <<= 10;
737 
738         b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m;
739         b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff);
740         a_flt_m |= 0x4000000000000000;
741         e = a_flt_e;
742         m = a_flt_m - b_flt_m;
743     }
744 
745     return _mesa_norm_round_pack_f64(s, e - 1, m);
746 }
747 
748 static inline void
_mesa_norm_subnormal_mantissa_f64(uint64_t m,uint64_t * exp,uint64_t * m_out)749 _mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out)
750 {
751     int shift_dist;
752 
753     shift_dist = _mesa_count_leading_zeros64(m) - 11;
754     *exp = 1 - shift_dist;
755     *m_out = m << shift_dist;
756 }
757 
758 static inline void
_mesa_norm_subnormal_mantissa_f32(uint32_t m,uint32_t * exp,uint32_t * m_out)759 _mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out)
760 {
761     int shift_dist;
762 
763     shift_dist = _mesa_count_leading_zeros32(m) - 8;
764     *exp = 1 - shift_dist;
765     *m_out = m << shift_dist;
766 }
767 
768 /**
769  * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location
770  * pointed to by 'zPtr'.  Argument 'zPtr' points to an array of four 32-bit
771  * elements that concatenate in the platform's normal endian order to form a
772  * 128-bit integer.
773  *
774  * From softfloat_mul64To128M()
775  */
776 static inline void
_mesa_softfloat_mul_f64_to_f128_m(uint64_t a,uint64_t b,uint32_t * m_out)777 _mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out)
778 {
779     uint32_t a32, a0, b32, b0;
780     uint64_t z0, mid1, z64, mid;
781 
782     a32 = a >> 32;
783     a0 = a;
784     b32 = b >> 32;
785     b0 = b;
786     z0 = (uint64_t) a0 * b0;
787     mid1 = (uint64_t) a32 * b0;
788     mid = mid1 + (uint64_t) a0 * b32;
789     z64 = (uint64_t) a32 * b32;
790     z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32;
791     mid <<= 32;
792     z0 += mid;
793     m_out[index_word(4, 1)] = z0 >> 32;
794     m_out[index_word(4, 0)] = z0;
795     z64 += (z0 < mid);
796     m_out[index_word(4, 3)] = z64 >> 32;
797     m_out[index_word(4, 2)] = z64;
798 }
799 
800 /* Calculate a * b but rounding to zero.
801  *
802  * Notice that this mainly differs from the original Berkeley SoftFloat 3e
803  * implementation in that we don't really treat NaNs, Zeroes nor the
804  * signalling flags. Any NaN is good for us and the sign of the Zero is not
805  * important.
806  *
807  * From f64_mul()
808  */
809 double
_mesa_double_mul_rtz(double a,double b)810 _mesa_double_mul_rtz(double a, double b)
811 {
812     const di_type a_di = {a};
813     uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
814     uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
815     uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
816     const di_type b_di = {b};
817     uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
818     uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
819     uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
820     int64_t s, e, m = 0;
821 
822     s = a_flt_s ^ b_flt_s;
823 
824     if (a_flt_e == 0x7ff) {
825         if (a_flt_m != 0) {
826             /* 'a' is a NaN, return NaN */
827             return a;
828         } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
829             /* 'b' is a NaN, return NaN */
830             return b;
831         }
832 
833         if (!(b_flt_e | b_flt_m)) {
834             /* Inf * 0 = NaN */
835             di_type result;
836             e = 0x7ff;
837             result.u = (s << 63) + (e << 52) + 0x1;
838             return result.f;
839         }
840         /* Inf * x = Inf */
841         di_type result;
842         e = 0x7ff;
843         result.u = (s << 63) + (e << 52) + 0;
844         return result.f;
845     }
846 
847     if (b_flt_e == 0x7ff) {
848         if (b_flt_m != 0) {
849             /* 'b' is a NaN, return NaN */
850             return b;
851         }
852         if (!(a_flt_e | a_flt_m)) {
853             /* 0 * Inf = NaN */
854             di_type result;
855             e = 0x7ff;
856             result.u = (s << 63) + (e << 52) + 0x1;
857             return result.f;
858         }
859         /* x * Inf = Inf */
860         di_type result;
861         e = 0x7ff;
862         result.u = (s << 63) + (e << 52) + 0;
863         return result.f;
864     }
865 
866     if (a_flt_e == 0) {
867         if (a_flt_m == 0) {
868             /* 'a' is zero. Return zero */
869             di_type result;
870             result.u = (s << 63) + 0;
871             return result.f;
872         }
873         _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m);
874     }
875     if (b_flt_e == 0) {
876         if (b_flt_m == 0) {
877             /* 'b' is zero. Return zero */
878             di_type result;
879             result.u = (s << 63) + 0;
880             return result.f;
881         }
882         _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m);
883     }
884 
885     e = a_flt_e + b_flt_e - 0x3ff;
886     a_flt_m = (a_flt_m | 0x0010000000000000) << 10;
887     b_flt_m = (b_flt_m | 0x0010000000000000) << 11;
888 
889     uint32_t m_128[4];
890     _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128);
891 
892     m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)];
893     if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
894         m |= 1;
895 
896     if (m < 0x4000000000000000) {
897         --e;
898         m <<= 1;
899     }
900 
901     return _mesa_roundtozero_f64(s, e, m);
902 }
903 
904 
905 /**
906  * \brief Calculate a * b + c but rounding to zero.
907  *
908  * Notice that this mainly differs from the original Berkeley SoftFloat 3e
909  * implementation in that we don't really treat NaNs, Zeroes nor the
910  * signalling flags. Any NaN is good for us and the sign of the Zero is not
911  * important.
912  *
913  * From f64_mulAdd()
914  */
915 double
_mesa_double_fma_rtz(double a,double b,double c)916 _mesa_double_fma_rtz(double a, double b, double c)
917 {
918     const di_type a_di = {a};
919     uint64_t a_flt_m = a_di.u & 0x0fffffffffffff;
920     uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff;
921     uint64_t a_flt_s = (a_di.u >> 63) & 0x1;
922     const di_type b_di = {b};
923     uint64_t b_flt_m = b_di.u & 0x0fffffffffffff;
924     uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff;
925     uint64_t b_flt_s = (b_di.u >> 63) & 0x1;
926     const di_type c_di = {c};
927     uint64_t c_flt_m = c_di.u & 0x0fffffffffffff;
928     uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff;
929     uint64_t c_flt_s = (c_di.u >> 63) & 0x1;
930     int64_t s, e, m = 0;
931 
932     c_flt_s ^= 0;
933     s = a_flt_s ^ b_flt_s ^ 0;
934 
935     if (a_flt_e == 0x7ff) {
936         if (a_flt_m != 0) {
937             /* 'a' is a NaN, return NaN */
938             return a;
939         } else if (b_flt_e == 0x7ff && b_flt_m != 0) {
940             /* 'b' is a NaN, return NaN */
941             return b;
942         } else if (c_flt_e == 0x7ff && c_flt_m != 0) {
943             /* 'c' is a NaN, return NaN */
944             return c;
945         }
946 
947         if (!(b_flt_e | b_flt_m)) {
948             /* Inf * 0 + y = NaN */
949             di_type result;
950             e = 0x7ff;
951             result.u = (s << 63) + (e << 52) + 0x1;
952             return result.f;
953         }
954 
955         if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) {
956             /* Inf * x - Inf = NaN */
957             di_type result;
958             e = 0x7ff;
959             result.u = (s << 63) + (e << 52) + 0x1;
960             return result.f;
961         }
962 
963         /* Inf * x + y = Inf */
964         di_type result;
965         e = 0x7ff;
966         result.u = (s << 63) + (e << 52) + 0;
967         return result.f;
968     }
969 
970     if (b_flt_e == 0x7ff) {
971         if (b_flt_m != 0) {
972             /* 'b' is a NaN, return NaN */
973             return b;
974         } else if (c_flt_e == 0x7ff && c_flt_m != 0) {
975             /* 'c' is a NaN, return NaN */
976             return c;
977         }
978 
979         if (!(a_flt_e | a_flt_m)) {
980             /* 0 * Inf + y = NaN */
981             di_type result;
982             e = 0x7ff;
983             result.u = (s << 63) + (e << 52) + 0x1;
984             return result.f;
985         }
986 
987         if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) {
988             /* x * Inf - Inf = NaN */
989             di_type result;
990             e = 0x7ff;
991             result.u = (s << 63) + (e << 52) + 0x1;
992             return result.f;
993         }
994 
995         /* x * Inf + y = Inf */
996         di_type result;
997         e = 0x7ff;
998         result.u = (s << 63) + (e << 52) + 0;
999         return result.f;
1000     }
1001 
1002     if (c_flt_e == 0x7ff) {
1003         if (c_flt_m != 0) {
1004             /* 'c' is a NaN, return NaN */
1005             return c;
1006         }
1007 
1008         /* x * y + Inf = Inf */
1009         return c;
1010     }
1011 
1012     if (a_flt_e == 0) {
1013         if (a_flt_m == 0) {
1014             /* 'a' is zero, return 'c' */
1015             return c;
1016         }
1017         _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m);
1018     }
1019 
1020     if (b_flt_e == 0) {
1021         if (b_flt_m == 0) {
1022             /* 'b' is zero, return 'c' */
1023             return c;
1024         }
1025         _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m);
1026     }
1027 
1028     e = a_flt_e + b_flt_e - 0x3fe;
1029     a_flt_m = (a_flt_m | 0x0010000000000000) << 10;
1030     b_flt_m = (b_flt_m | 0x0010000000000000) << 11;
1031 
1032     uint32_t m_128[4];
1033     _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128);
1034 
1035     m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)];
1036 
1037     int64_t shift_dist = 0;
1038     if (!(m & 0x4000000000000000)) {
1039         --e;
1040         shift_dist = -1;
1041     }
1042 
1043     if (c_flt_e == 0) {
1044         if (c_flt_m == 0) {
1045             /* 'c' is zero, return 'a * b' */
1046             if (shift_dist)
1047                 m <<= 1;
1048 
1049             if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1050                 m |= 1;
1051             return _mesa_roundtozero_f64(s, e - 1, m);
1052         }
1053         _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m);
1054     }
1055     c_flt_m = (c_flt_m | 0x0010000000000000) << 10;
1056 
1057     uint32_t c_flt_m_128[4];
1058     int64_t exp_diff = e - c_flt_e;
1059     if (exp_diff < 0) {
1060         e = c_flt_e;
1061         if ((s == c_flt_s) || (exp_diff < -1)) {
1062             shift_dist -= exp_diff;
1063             if (shift_dist) {
1064                 m = _mesa_shift_right_jam64(m, shift_dist);
1065             }
1066         } else {
1067             if (!shift_dist) {
1068                 _mesa_short_shift_right_m(4, m_128, 1, m_128);
1069             }
1070         }
1071     } else {
1072         if (shift_dist)
1073             _mesa_add_m(4, m_128, m_128, m_128);
1074         if (!exp_diff) {
1075             m = (uint64_t) m_128[index_word(4, 3)] << 32
1076                 | m_128[index_word(4, 2)];
1077         } else {
1078             c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32;
1079             c_flt_m_128[index_word(4, 2)] = c_flt_m;
1080             c_flt_m_128[index_word(4, 1)] = 0;
1081             c_flt_m_128[index_word(4, 0)] = 0;
1082             _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128);
1083         }
1084     }
1085 
1086     if (s == c_flt_s) {
1087         if (exp_diff <= 0) {
1088             m += c_flt_m;
1089         } else {
1090             _mesa_add_m(4, m_128, c_flt_m_128, m_128);
1091             m = (uint64_t) m_128[index_word(4, 3)] << 32
1092                 | m_128[index_word(4, 2)];
1093         }
1094         if (m & 0x8000000000000000) {
1095             e++;
1096             m = _mesa_short_shift_right_jam64(m, 1);
1097         }
1098     } else {
1099         if (exp_diff < 0) {
1100             s = c_flt_s;
1101             if (exp_diff < -1) {
1102                 m = c_flt_m - m;
1103                 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) {
1104                     m = (m - 1) | 1;
1105                 }
1106                 if (!(m & 0x4000000000000000)) {
1107                     --e;
1108                     m <<= 1;
1109                 }
1110                 return _mesa_roundtozero_f64(s, e - 1, m);
1111             } else {
1112                 c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32;
1113                 c_flt_m_128[index_word(4, 2)] = c_flt_m;
1114                 c_flt_m_128[index_word(4, 1)] = 0;
1115                 c_flt_m_128[index_word(4, 0)] = 0;
1116                 _mesa_sub_m(4, c_flt_m_128, m_128, m_128);
1117             }
1118         } else if (!exp_diff) {
1119             m -= c_flt_m;
1120             if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) {
1121                 /* Return zero */
1122                 di_type result;
1123                 result.u = (s << 63) + 0;
1124                 return result.f;
1125             }
1126             m_128[index_word(4, 3)] = m >> 32;
1127             m_128[index_word(4, 2)] = m;
1128             if (m & 0x8000000000000000) {
1129                 s = !s;
1130                 _mesa_neg_x_m(4, m_128);
1131             }
1132         } else {
1133             _mesa_sub_m(4, m_128, c_flt_m_128, m_128);
1134             if (1 < exp_diff) {
1135                 m = (uint64_t) m_128[index_word(4, 3)] << 32
1136                     | m_128[index_word(4, 2)];
1137                 if (!(m & 0x4000000000000000)) {
1138                     --e;
1139                     m <<= 1;
1140                 }
1141                 if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1142                     m |= 1;
1143                 return _mesa_roundtozero_f64(s, e - 1, m);
1144             }
1145         }
1146 
1147         shift_dist = 0;
1148         m = (uint64_t) m_128[index_word(4, 3)] << 32
1149             | m_128[index_word(4, 2)];
1150         if (!m) {
1151             shift_dist = 64;
1152             m = (uint64_t) m_128[index_word(4, 1)] << 32
1153                 | m_128[index_word(4, 0)];
1154         }
1155         shift_dist += _mesa_count_leading_zeros64(m) - 1;
1156         if (shift_dist) {
1157             e -= shift_dist;
1158             _mesa_shift_left_m(4, m_128, shift_dist, m_128);
1159             m = (uint64_t) m_128[index_word(4, 3)] << 32
1160                 | m_128[index_word(4, 2)];
1161         }
1162     }
1163 
1164     if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)])
1165         m |= 1;
1166     return _mesa_roundtozero_f64(s, e - 1, m);
1167 }
1168 
1169 
1170 /**
1171  * \brief Calculate a * b + c but rounding to zero.
1172  *
1173  * Notice that this mainly differs from the original Berkeley SoftFloat 3e
1174  * implementation in that we don't really treat NaNs, Zeroes nor the
1175  * signalling flags. Any NaN is good for us and the sign of the Zero is not
1176  * important.
1177  *
1178  * From f32_mulAdd()
1179  */
1180 float
_mesa_float_fma_rtz(float a,float b,float c)1181 _mesa_float_fma_rtz(float a, float b, float c)
1182 {
1183     const fi_type a_fi = {a};
1184     uint32_t a_flt_m = a_fi.u & 0x07fffff;
1185     uint32_t a_flt_e = (a_fi.u >> 23) & 0xff;
1186     uint32_t a_flt_s = (a_fi.u >> 31) & 0x1;
1187     const fi_type b_fi = {b};
1188     uint32_t b_flt_m = b_fi.u & 0x07fffff;
1189     uint32_t b_flt_e = (b_fi.u >> 23) & 0xff;
1190     uint32_t b_flt_s = (b_fi.u >> 31) & 0x1;
1191     const fi_type c_fi = {c};
1192     uint32_t c_flt_m = c_fi.u & 0x07fffff;
1193     uint32_t c_flt_e = (c_fi.u >> 23) & 0xff;
1194     uint32_t c_flt_s = (c_fi.u >> 31) & 0x1;
1195     int32_t s, e, m = 0;
1196 
1197     c_flt_s ^= 0;
1198     s = a_flt_s ^ b_flt_s ^ 0;
1199 
1200     if (a_flt_e == 0xff) {
1201         if (a_flt_m != 0) {
1202             /* 'a' is a NaN, return NaN */
1203             return a;
1204         } else if (b_flt_e == 0xff && b_flt_m != 0) {
1205             /* 'b' is a NaN, return NaN */
1206             return b;
1207         } else if (c_flt_e == 0xff && c_flt_m != 0) {
1208             /* 'c' is a NaN, return NaN */
1209             return c;
1210         }
1211 
1212         if (!(b_flt_e | b_flt_m)) {
1213             /* Inf * 0 + y = NaN */
1214             fi_type result;
1215             e = 0xff;
1216             result.u = (s << 31) + (e << 23) + 0x1;
1217             return result.f;
1218         }
1219 
1220         if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) {
1221             /* Inf * x - Inf = NaN */
1222             fi_type result;
1223             e = 0xff;
1224             result.u = (s << 31) + (e << 23) + 0x1;
1225             return result.f;
1226         }
1227 
1228         /* Inf * x + y = Inf */
1229         fi_type result;
1230         e = 0xff;
1231         result.u = (s << 31) + (e << 23) + 0;
1232         return result.f;
1233     }
1234 
1235     if (b_flt_e == 0xff) {
1236         if (b_flt_m != 0) {
1237             /* 'b' is a NaN, return NaN */
1238             return b;
1239         } else if (c_flt_e == 0xff && c_flt_m != 0) {
1240             /* 'c' is a NaN, return NaN */
1241             return c;
1242         }
1243 
1244         if (!(a_flt_e | a_flt_m)) {
1245             /* 0 * Inf + y = NaN */
1246             fi_type result;
1247             e = 0xff;
1248             result.u = (s << 31) + (e << 23) + 0x1;
1249             return result.f;
1250         }
1251 
1252         if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) {
1253             /* x * Inf - Inf = NaN */
1254             fi_type result;
1255             e = 0xff;
1256             result.u = (s << 31) + (e << 23) + 0x1;
1257             return result.f;
1258         }
1259 
1260         /* x * Inf + y = Inf */
1261         fi_type result;
1262         e = 0xff;
1263         result.u = (s << 31) + (e << 23) + 0;
1264         return result.f;
1265     }
1266 
1267     if (c_flt_e == 0xff) {
1268         if (c_flt_m != 0) {
1269             /* 'c' is a NaN, return NaN */
1270             return c;
1271         }
1272 
1273         /* x * y + Inf = Inf */
1274         return c;
1275     }
1276 
1277     if (a_flt_e == 0) {
1278         if (a_flt_m == 0) {
1279             /* 'a' is zero, return 'c' */
1280             return c;
1281         }
1282         _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m);
1283     }
1284 
1285     if (b_flt_e == 0) {
1286         if (b_flt_m == 0) {
1287             /* 'b' is zero, return 'c' */
1288             return c;
1289         }
1290         _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m);
1291     }
1292 
1293     e = a_flt_e + b_flt_e - 0x7e;
1294     a_flt_m = (a_flt_m | 0x00800000) << 7;
1295     b_flt_m = (b_flt_m | 0x00800000) << 7;
1296 
1297     uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m;
1298     if (m_64 < 0x2000000000000000) {
1299         --e;
1300         m_64 <<= 1;
1301     }
1302 
1303     if (c_flt_e == 0) {
1304         if (c_flt_m == 0) {
1305             /* 'c' is zero, return 'a * b' */
1306             m = _mesa_short_shift_right_jam64(m_64, 31);
1307             return _mesa_round_f32(s, e - 1, m, true);
1308         }
1309         _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m);
1310     }
1311     c_flt_m = (c_flt_m | 0x00800000) << 6;
1312 
1313     int16_t exp_diff = e - c_flt_e;
1314     if (s == c_flt_s) {
1315         if (exp_diff <= 0) {
1316             e = c_flt_e;
1317             m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff);
1318         } else {
1319             m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff);
1320             m = _mesa_short_shift_right_jam64(m_64, 32);
1321         }
1322         if (m < 0x40000000) {
1323             --e;
1324             m <<= 1;
1325         }
1326     } else {
1327         uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32;
1328         if (exp_diff < 0) {
1329             s = c_flt_s;
1330             e = c_flt_e;
1331             m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff);
1332         } else if (!exp_diff) {
1333             m_64 -= c_flt_m_64;
1334             if (!m_64) {
1335                 /* Return zero */
1336                 fi_type result;
1337                 result.u = (s << 31) + 0;
1338                 return result.f;
1339             }
1340             if (m_64 & 0x8000000000000000) {
1341                 s = !s;
1342                 m_64 = -m_64;
1343             }
1344         } else {
1345             m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff);
1346         }
1347         int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1;
1348         e -= shift_dist;
1349         shift_dist -= 32;
1350         if (shift_dist < 0) {
1351             m = _mesa_short_shift_right_jam64(m_64, -shift_dist);
1352         } else {
1353             m = (uint32_t) m_64 << shift_dist;
1354         }
1355     }
1356 
1357     return _mesa_round_f32(s, e, m, true);
1358 }
1359 
1360 
1361 /**
1362  * \brief Converts from 64bits to 32bits float and rounds according to
1363  * instructed.
1364  *
1365  * From f64_to_f32()
1366  */
1367 float
_mesa_double_to_f32(double val,bool rtz)1368 _mesa_double_to_f32(double val, bool rtz)
1369 {
1370     const di_type di = {val};
1371     uint64_t flt_m = di.u & 0x0fffffffffffff;
1372     uint64_t flt_e = (di.u >> 52) & 0x7ff;
1373     uint64_t flt_s = (di.u >> 63) & 0x1;
1374     int32_t s, e, m = 0;
1375 
1376     s = flt_s;
1377 
1378     if (flt_e == 0x7ff) {
1379         if (flt_m != 0) {
1380             /* 'val' is a NaN, return NaN */
1381             fi_type result;
1382             e = 0xff;
1383             m = 0x1;
1384             result.u = (s << 31) + (e << 23) + m;
1385             return result.f;
1386         }
1387 
1388         /* 'val' is Inf, return Inf */
1389         fi_type result;
1390         e = 0xff;
1391         result.u = (s << 31) + (e << 23) + m;
1392         return result.f;
1393     }
1394 
1395     if (!(flt_e | flt_m)) {
1396         /* 'val' is zero, return zero */
1397         fi_type result;
1398         e = 0;
1399         result.u = (s << 31) + (e << 23) + m;
1400         return result.f;
1401     }
1402 
1403     m = _mesa_short_shift_right_jam64(flt_m, 22);
1404     if ( ! (flt_e | m) ) {
1405         /* 'val' is denorm, return zero */
1406         fi_type result;
1407         e = 0;
1408         result.u = (s << 31) + (e << 23) + m;
1409         return result.f;
1410     }
1411 
1412     return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz);
1413 }
1414 
1415 
1416 /**
1417  * \brief Converts from 32bits to 16bits float and rounds the result to zero.
1418  *
1419  * From f32_to_f16()
1420  */
1421 uint16_t
_mesa_float_to_half_rtz_slow(float val)1422 _mesa_float_to_half_rtz_slow(float val)
1423 {
1424     const fi_type fi = {val};
1425     const uint32_t flt_m = fi.u & 0x7fffff;
1426     const uint32_t flt_e = (fi.u >> 23) & 0xff;
1427     const uint32_t flt_s = (fi.u >> 31) & 0x1;
1428     int16_t s, e, m = 0;
1429 
1430     s = flt_s;
1431 
1432     if (flt_e == 0xff) {
1433         if (flt_m != 0) {
1434             /* 'val' is a NaN, return NaN */
1435             e = 0x1f;
1436             /* Retain the top bits of a NaN to make sure that the quiet/signaling
1437             * status stays the same.
1438             */
1439             m = flt_m >> 13;
1440             if (!m)
1441                m = 1;
1442             return (s << 15) + (e << 10) + m;
1443         }
1444 
1445         /* 'val' is Inf, return Inf */
1446         e = 0x1f;
1447         return (s << 15) + (e << 10) + m;
1448     }
1449 
1450     if (!(flt_e | flt_m)) {
1451         /* 'val' is zero, return zero */
1452         e = 0;
1453         return (s << 15) + (e << 10) + m;
1454     }
1455 
1456     m = flt_m >> 9 | ((flt_m & 0x1ff) != 0);
1457     if ( ! (flt_e | m) ) {
1458         /* 'val' is denorm, return zero */
1459         e = 0;
1460         return (s << 15) + (e << 10) + m;
1461     }
1462 
1463     return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000);
1464 }
1465