1/* 2 * The implementations contained in this file are heavily based on the 3 * implementations found in the Berkeley SoftFloat library. As such, they are 4 * licensed under the same 3-clause BSD license: 5 * 6 * License for Berkeley SoftFloat Release 3e 7 * 8 * John R. Hauser 9 * 2018 January 20 10 * 11 * The following applies to the whole of SoftFloat Release 3e as well as to 12 * each source file individually. 13 * 14 * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 15 * University of California. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions are met: 19 * 20 * 1. Redistributions of source code must retain the above copyright notice, 21 * this list of conditions, and the following disclaimer. 22 * 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions, and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * 3. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 32 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 34 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 35 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 38 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41*/ 42 43#version 400 44#extension GL_ARB_gpu_shader_int64 : enable 45#extension GL_ARB_shader_bit_encoding : enable 46#extension GL_EXT_shader_integer_mix : enable 47#extension GL_MESA_shader_integer_functions : enable 48 49#pragma warning(off) 50 51/* Software IEEE floating-point rounding mode. 52 * GLSL spec section "4.7.1 Range and Precision": 53 * The rounding mode cannot be set and is undefined. 54 * But here, we are able to define the rounding mode at the compilation time. 55 */ 56#define FLOAT_ROUND_NEAREST_EVEN 0 57#define FLOAT_ROUND_TO_ZERO 1 58#define FLOAT_ROUND_DOWN 2 59#define FLOAT_ROUND_UP 3 60#define FLOAT_ROUNDING_MODE FLOAT_ROUND_NEAREST_EVEN 61 62/* Relax propagation of NaN. Binary operations with a NaN source will still 63 * produce a NaN result, but it won't follow strict IEEE rules. 64 */ 65#define RELAXED_NAN_PROPAGATION 66 67/* Absolute value of a Float64 : 68 * Clear the sign bit 69 */ 70uint64_t 71__fabs64(uint64_t __a) 72{ 73 uvec2 a = unpackUint2x32(__a); 74 a.y &= 0x7FFFFFFFu; 75 return packUint2x32(a); 76} 77 78/* Returns 1 if the double-precision floating-point value `a' is a NaN; 79 * otherwise returns 0. 80 */ 81bool 82__is_nan(uint64_t __a) 83{ 84 uvec2 a = unpackUint2x32(__a); 85 return (0xFFE00000u <= (a.y<<1)) && 86 ((a.x != 0u) || ((a.y & 0x000FFFFFu) != 0u)); 87} 88 89/* Negate value of a Float64 : 90 * Toggle the sign bit 91 */ 92uint64_t 93__fneg64(uint64_t __a) 94{ 95 uvec2 a = unpackUint2x32(__a); 96 a.y ^= (1u << 31); 97 return packUint2x32(a); 98} 99 100uint64_t 101__fsign64(uint64_t __a) 102{ 103 uvec2 a = unpackUint2x32(__a); 104 uvec2 retval; 105 retval.x = 0u; 106 retval.y = mix((a.y & 0x80000000u) | 0x3FF00000u, 0u, (a.y << 1 | a.x) == 0u); 107 return packUint2x32(retval); 108} 109 110/* Returns the fraction bits of the double-precision floating-point value `a'.*/ 111uint 112__extractFloat64FracLo(uint64_t a) 113{ 114 return unpackUint2x32(a).x; 115} 116 117uint 118__extractFloat64FracHi(uint64_t a) 119{ 120 return unpackUint2x32(a).y & 0x000FFFFFu; 121} 122 123/* Returns the exponent bits of the double-precision floating-point value `a'.*/ 124int 125__extractFloat64Exp(uint64_t __a) 126{ 127 uvec2 a = unpackUint2x32(__a); 128 return int((a.y>>20) & 0x7FFu); 129} 130 131bool 132__feq64_nonnan(uint64_t __a, uint64_t __b) 133{ 134 uvec2 a = unpackUint2x32(__a); 135 uvec2 b = unpackUint2x32(__b); 136 return (a.x == b.x) && 137 ((a.y == b.y) || ((a.x == 0u) && (((a.y | b.y)<<1) == 0u))); 138} 139 140/* Returns true if the double-precision floating-point value `a' is equal to the 141 * corresponding value `b', and false otherwise. The comparison is performed 142 * according to the IEEE Standard for Floating-Point Arithmetic. 143 */ 144bool 145__feq64(uint64_t a, uint64_t b) 146{ 147 if (__is_nan(a) || __is_nan(b)) 148 return false; 149 150 return __feq64_nonnan(a, b); 151} 152 153/* Returns true if the double-precision floating-point value `a' is not equal 154 * to the corresponding value `b', and false otherwise. The comparison is 155 * performed according to the IEEE Standard for Floating-Point Arithmetic. 156 */ 157bool 158__fneu64(uint64_t a, uint64_t b) 159{ 160 if (__is_nan(a) || __is_nan(b)) 161 return true; 162 163 return !__feq64_nonnan(a, b); 164} 165 166/* Returns the sign bit of the double-precision floating-point value `a'.*/ 167uint 168__extractFloat64Sign(uint64_t a) 169{ 170 return unpackUint2x32(a).y & 0x80000000u; 171} 172 173/* Returns true if the signed 64-bit value formed by concatenating `a0' and 174 * `a1' is less than the signed 64-bit value formed by concatenating `b0' and 175 * `b1'. Otherwise, returns false. 176 */ 177bool 178ilt64(uint a0, uint a1, uint b0, uint b1) 179{ 180 return (int(a0) < int(b0)) || ((a0 == b0) && (a1 < b1)); 181} 182 183bool 184__flt64_nonnan(uint64_t __a, uint64_t __b) 185{ 186 uvec2 a = unpackUint2x32(__a); 187 uvec2 b = unpackUint2x32(__b); 188 189 /* IEEE 754 floating point numbers are specifically designed so that, with 190 * two exceptions, values can be compared by bit-casting to signed integers 191 * with the same number of bits. 192 * 193 * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: 194 * 195 * When comparing as 2's-complement integers: If the sign bits differ, 196 * the negative number precedes the positive number, so 2's complement 197 * gives the correct result (except that negative zero and positive zero 198 * should be considered equal). If both values are positive, the 2's 199 * complement comparison again gives the correct result. Otherwise (two 200 * negative numbers), the correct FP ordering is the opposite of the 2's 201 * complement ordering. 202 * 203 * The logic implied by the above quotation is: 204 * 205 * !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b) 206 * 207 * This is equivalent to 208 * 209 * fneu(a, b) && (both_negative(a, b) ? a >= b : a < b) 210 * 211 * fneu(a, b) && (both_negative(a, b) ? !(a < b) : a < b) 212 * 213 * fneu(a, b) && ((both_negative(a, b) && !(a < b)) || 214 * (!both_negative(a, b) && (a < b))) 215 * 216 * (A!|B)&(A|!B) is (A xor B) which is implemented here using !=. 217 * 218 * fneu(a, b) && (both_negative(a, b) != (a < b)) 219 */ 220 bool lt = ilt64(a.y, a.x, b.y, b.x); 221 bool both_negative = (a.y & b.y & 0x80000000u) != 0; 222 223 return !__feq64_nonnan(__a, __b) && (lt != both_negative); 224} 225 226bool 227__flt64_nonnan_minmax(uint64_t __a, uint64_t __b) 228{ 229 uvec2 a = unpackUint2x32(__a); 230 uvec2 b = unpackUint2x32(__b); 231 232 /* See __flt64_nonnan. For implementing fmin/fmax, we compare -0 < 0, so the 233 * implied logic is a bit simpler: 234 * 235 * both_negative(a, b) ? a > b : a < b 236 * 237 * If a == b, it doesn't matter what we return, so that's equivalent to: 238 * 239 * both_negative(a, b) ? a >= b : a < b 240 * both_negative(a, b) ? !(a < b) : a < b 241 * both_negative(a, b) ^ (a < b) 242 * 243 * XOR is again implemented using !=. 244 */ 245 bool lt = ilt64(a.y, a.x, b.y, b.x); 246 bool both_negative = (a.y & b.y & 0x80000000u) != 0; 247 248 return (lt != both_negative); 249} 250 251/* Returns true if the double-precision floating-point value `a' is less than 252 * the corresponding value `b', and false otherwise. The comparison is performed 253 * according to the IEEE Standard for Floating-Point Arithmetic. 254 */ 255bool 256__flt64(uint64_t a, uint64_t b) 257{ 258 /* This weird layout matters. Doing the "obvious" thing results in extra 259 * flow control being inserted to implement the short-circuit evaluation 260 * rules. Flow control is bad! 261 */ 262 bool x = !__is_nan(a); 263 bool y = !__is_nan(b); 264 bool z = __flt64_nonnan(a, b); 265 266 return (x && y && z); 267} 268 269/* Returns true if the double-precision floating-point value `a' is greater 270 * than or equal to * the corresponding value `b', and false otherwise. The 271 * comparison is performed * according to the IEEE Standard for Floating-Point 272 * Arithmetic. 273 */ 274bool 275__fge64(uint64_t a, uint64_t b) 276{ 277 /* This weird layout matters. Doing the "obvious" thing results in extra 278 * flow control being inserted to implement the short-circuit evaluation 279 * rules. Flow control is bad! 280 */ 281 bool x = !__is_nan(a); 282 bool y = !__is_nan(b); 283 bool z = !__flt64_nonnan(a, b); 284 285 return (x && y && z); 286} 287 288uint64_t 289__fsat64(uint64_t __a) 290{ 291 uvec2 a = unpackUint2x32(__a); 292 293 /* fsat(NaN) should be zero. */ 294 if (__is_nan(__a) || int(a.y) < 0) 295 return 0ul; 296 297 /* IEEE 754 floating point numbers are specifically designed so that, with 298 * two exceptions, values can be compared by bit-casting to signed integers 299 * with the same number of bits. 300 * 301 * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers: 302 * 303 * When comparing as 2's-complement integers: If the sign bits differ, 304 * the negative number precedes the positive number, so 2's complement 305 * gives the correct result (except that negative zero and positive zero 306 * should be considered equal). If both values are positive, the 2's 307 * complement comparison again gives the correct result. Otherwise (two 308 * negative numbers), the correct FP ordering is the opposite of the 2's 309 * complement ordering. 310 * 311 * We know that both values are not negative, and we know that at least one 312 * value is not zero. Therefore, we can just use the 2's complement 313 * comparison ordering. 314 */ 315 if (ilt64(0x3FF00000, 0x00000000, a.y, a.x)) 316 return 0x3FF0000000000000ul; 317 318 return __a; 319} 320 321/* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit 322 * value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so 323 * any carry out is lost. The result is broken into two 32-bit pieces which 324 * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 325 */ 326void 327__add64(uint a0, uint a1, uint b0, uint b1, 328 out uint z0Ptr, 329 out uint z1Ptr) 330{ 331 uint z1 = a1 + b1; 332 z1Ptr = z1; 333 z0Ptr = a0 + b0 + uint(z1 < a1); 334} 335 336 337/* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the 338 * 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 339 * 2^64, so any borrow out (carry out) is lost. The result is broken into two 340 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and 341 * `z1Ptr'. 342 */ 343void 344__sub64(uint a0, uint a1, uint b0, uint b1, 345 out uint z0Ptr, 346 out uint z1Ptr) 347{ 348 z1Ptr = a1 - b1; 349 z0Ptr = a0 - b0 - uint(a1 < b1); 350} 351 352/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 353 * number of bits given in `count'. If any nonzero bits are shifted off, they 354 * are "jammed" into the least significant bit of the result by setting the 355 * least significant bit to 1. The value of `count' can be arbitrarily large; 356 * in particular, if `count' is greater than 64, the result will be either 0 357 * or 1, depending on whether the concatenation of `a0' and `a1' is zero or 358 * nonzero. The result is broken into two 32-bit pieces which are stored at 359 * the locations pointed to by `z0Ptr' and `z1Ptr'. 360 */ 361void 362__shift64RightJamming(uint a0, 363 uint a1, 364 int count, 365 out uint z0Ptr, 366 out uint z1Ptr) 367{ 368 uint z0; 369 uint z1; 370 int negCount = (-count) & 31; 371 372 z0 = mix(0u, a0, count == 0); 373 z0 = mix(z0, (a0 >> count), count < 32); 374 375 z1 = uint((a0 | a1) != 0u); /* count >= 64 */ 376 uint z1_lt64 = (a0>>(count & 31)) | uint(((a0<<negCount) | a1) != 0u); 377 z1 = mix(z1, z1_lt64, count < 64); 378 z1 = mix(z1, (a0 | uint(a1 != 0u)), count == 32); 379 uint z1_lt32 = (a0<<negCount) | (a1>>count) | uint ((a1<<negCount) != 0u); 380 z1 = mix(z1, z1_lt32, count < 32); 381 z1 = mix(z1, a1, count == 0); 382 z1Ptr = z1; 383 z0Ptr = z0; 384} 385 386/* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right 387 * by 32 _plus_ the number of bits given in `count'. The shifted result is 388 * at most 64 nonzero bits; these are broken into two 32-bit pieces which are 389 * stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 390 * off form a third 32-bit result as follows: The _last_ bit shifted off is 391 * the most-significant bit of the extra result, and the other 31 bits of the 392 * extra result are all zero if and only if _all_but_the_last_ bits shifted off 393 * were all zero. This extra result is stored in the location pointed to by 394 * `z2Ptr'. The value of `count' can be arbitrarily large. 395 * (This routine makes more sense if `a0', `a1', and `a2' are considered 396 * to form a fixed-point value with binary point between `a1' and `a2'. This 397 * fixed-point value is shifted right by the number of bits given in `count', 398 * and the integer part of the result is returned at the locations pointed to 399 * by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 400 * corrupted as described above, and is returned at the location pointed to by 401 * `z2Ptr'.) 402 */ 403void 404__shift64ExtraRightJamming(uint a0, uint a1, uint a2, 405 int count, 406 out uint z0Ptr, 407 out uint z1Ptr, 408 out uint z2Ptr) 409{ 410 uint z0 = 0u; 411 uint z1; 412 uint z2; 413 int negCount = (-count) & 31; 414 415 z2 = mix(uint(a0 != 0u), a0, count == 64); 416 z2 = mix(z2, a0 << negCount, count < 64); 417 z2 = mix(z2, a1 << negCount, count < 32); 418 419 z1 = mix(0u, (a0 >> (count & 31)), count < 64); 420 z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 421 422 a2 = mix(a2 | a1, a2, count < 32); 423 z0 = mix(z0, a0 >> count, count < 32); 424 z2 |= uint(a2 != 0u); 425 426 z0 = mix(z0, 0u, (count == 32)); 427 z1 = mix(z1, a0, (count == 32)); 428 z2 = mix(z2, a1, (count == 32)); 429 z0 = mix(z0, a0, (count == 0)); 430 z1 = mix(z1, a1, (count == 0)); 431 z2 = mix(z2, a2, (count == 0)); 432 z2Ptr = z2; 433 z1Ptr = z1; 434 z0Ptr = z0; 435} 436 437/* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the 438 * number of bits given in `count'. Any bits shifted off are lost. The value 439 * of `count' must be less than 32. The result is broken into two 32-bit 440 * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 441 */ 442void 443__shortShift64Left(uint a0, uint a1, 444 int count, 445 out uint z0Ptr, 446 out uint z1Ptr) 447{ 448 z1Ptr = a1<<count; 449 z0Ptr = mix((a0 << count | (a1 >> ((-count) & 31))), a0, count == 0); 450} 451 452/* Packs the sign `zSign', the exponent `zExp', and the significand formed by 453 * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating- 454 * point value, returning the result. After being shifted into the proper 455 * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added 456 * together to form the most significant 32 bits of the result. This means 457 * that any integer portion of `zFrac0' will be added into the exponent. Since 458 * a properly normalized significand will have an integer portion equal to 1, 459 * the `zExp' input should be 1 less than the desired result exponent whenever 460 * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand. 461 */ 462uint64_t 463__packFloat64(uint zSign, int zExp, uint zFrac0, uint zFrac1) 464{ 465 uvec2 z; 466 467 z.y = zSign + (uint(zExp) << 20) + zFrac0; 468 z.x = zFrac1; 469 return packUint2x32(z); 470} 471 472/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 473 * and extended significand formed by the concatenation of `zFrac0', `zFrac1', 474 * and `zFrac2', and returns the proper double-precision floating-point value 475 * corresponding to the abstract input. Ordinarily, the abstract value is 476 * simply rounded and packed into the double-precision format, with the inexact 477 * exception raised if the abstract input cannot be represented exactly. 478 * However, if the abstract value is too large, the overflow and inexact 479 * exceptions are raised and an infinity or maximal finite value is returned. 480 * If the abstract value is too small, the input value is rounded to a 481 * subnormal number, and the underflow and inexact exceptions are raised if the 482 * abstract input cannot be represented exactly as a subnormal double-precision 483 * floating-point number. 484 * The input significand must be normalized or smaller. If the input 485 * significand is not normalized, `zExp' must be 0; in that case, the result 486 * returned is a subnormal number, and it must not require rounding. In the 487 * usual case that the input significand is normalized, `zExp' must be 1 less 488 * than the "true" floating-point exponent. The handling of underflow and 489 * overflow follows the IEEE Standard for Floating-Point Arithmetic. 490 */ 491uint64_t 492__roundAndPackFloat64(uint zSign, 493 int zExp, 494 uint zFrac0, 495 uint zFrac1, 496 uint zFrac2) 497{ 498 bool roundNearestEven; 499 bool increment; 500 501 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 502 increment = int(zFrac2) < 0; 503 if (!roundNearestEven) { 504 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 505 increment = false; 506 } else { 507 if (zSign != 0u) { 508 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 509 (zFrac2 != 0u); 510 } else { 511 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 512 (zFrac2 != 0u); 513 } 514 } 515 } 516 if (0x7FD <= zExp) { 517 if ((0x7FD < zExp) || 518 ((zExp == 0x7FD) && 519 (0x001FFFFFu == zFrac0 && 0xFFFFFFFFu == zFrac1) && 520 increment)) { 521 if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) || 522 ((zSign != 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP)) || 523 ((zSign == 0u) && (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN))) { 524 return __packFloat64(zSign, 0x7FE, 0x000FFFFFu, 0xFFFFFFFFu); 525 } 526 return __packFloat64(zSign, 0x7FF, 0u, 0u); 527 } 528 } 529 530 if (zExp < 0) { 531 __shift64ExtraRightJamming( 532 zFrac0, zFrac1, zFrac2, -zExp, zFrac0, zFrac1, zFrac2); 533 zExp = 0; 534 if (roundNearestEven) { 535 increment = zFrac2 < 0u; 536 } else { 537 if (zSign != 0u) { 538 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 539 (zFrac2 != 0u); 540 } else { 541 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 542 (zFrac2 != 0u); 543 } 544 } 545 } 546 547 if (increment) { 548 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 549 zFrac1 &= ~((zFrac2 + uint(zFrac2 == 0u)) & uint(roundNearestEven)); 550 } else { 551 zExp = mix(zExp, 0, (zFrac0 | zFrac1) == 0u); 552 } 553 return __packFloat64(zSign, zExp, zFrac0, zFrac1); 554} 555 556uint64_t 557__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 558{ 559 bool roundNearestEven; 560 bool increment; 561 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 562 563 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 564 565 if (zFrac2 >= 0x80000000u) 566 increment = false; 567 568 if (!roundNearestEven) { 569 if (zSign != 0u) { 570 if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) { 571 increment = false; 572 } 573 } else { 574 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 575 (zFrac2 != 0u); 576 } 577 } 578 579 if (increment) { 580 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 581 if ((zFrac0 | zFrac1) != 0u) 582 zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 583 } 584 return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan, 585 (zSign != 0u && (zFrac0 | zFrac1) != 0u)); 586} 587 588int64_t 589__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2) 590{ 591 bool roundNearestEven; 592 bool increment; 593 int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 594 int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 595 596 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 597 598 if (zFrac2 >= 0x80000000u) 599 increment = false; 600 601 if (!roundNearestEven) { 602 if (zSign != 0u) { 603 increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && 604 (zFrac2 != 0u)); 605 } else { 606 increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) && 607 (zFrac2 != 0u); 608 } 609 } 610 611 if (increment) { 612 __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1); 613 if ((zFrac0 | zFrac1) != 0u) 614 zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven); 615 } 616 617 int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 618 -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))), 619 zSign != 0u); 620 int64_t nan = mix(default_PosNaN, default_NegNaN, zSign != 0u); 621 return mix(absZ, nan, ((zSign != 0u) != (absZ < 0)) && bool(absZ)); 622} 623 624/* Returns the number of leading 0 bits before the most-significant 1 bit of 625 * `a'. If `a' is zero, 32 is returned. 626 */ 627int 628__countLeadingZeros32(uint a) 629{ 630 return 31 - findMSB(a); 631} 632 633/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 634 * and significand formed by the concatenation of `zSig0' and `zSig1', and 635 * returns the proper double-precision floating-point value corresponding 636 * to the abstract input. This routine is just like `__roundAndPackFloat64' 637 * except that the input significand has fewer bits and does not have to be 638 * normalized. In all cases, `zExp' must be 1 less than the "true" floating- 639 * point exponent. 640 */ 641uint64_t 642__normalizeRoundAndPackFloat64(uint zSign, 643 int zExp, 644 uint zFrac0, 645 uint zFrac1) 646{ 647 int shiftCount; 648 uint zFrac2; 649 650 if (zFrac0 == 0u) { 651 zExp -= 32; 652 zFrac0 = zFrac1; 653 zFrac1 = 0u; 654 } 655 656 shiftCount = __countLeadingZeros32(zFrac0) - 11; 657 if (0 <= shiftCount) { 658 zFrac2 = 0u; 659 __shortShift64Left(zFrac0, zFrac1, shiftCount, zFrac0, zFrac1); 660 } else { 661 __shift64ExtraRightJamming( 662 zFrac0, zFrac1, 0u, -shiftCount, zFrac0, zFrac1, zFrac2); 663 } 664 zExp -= shiftCount; 665 return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 666} 667 668/* Takes two double-precision floating-point values `a' and `b', one of which 669 * is a NaN, and returns the appropriate NaN result. 670 */ 671uint64_t 672__propagateFloat64NaN(uint64_t __a, uint64_t __b) 673{ 674#if defined RELAXED_NAN_PROPAGATION 675 uvec2 a = unpackUint2x32(__a); 676 uvec2 b = unpackUint2x32(__b); 677 678 return packUint2x32(uvec2(a.x | b.x, a.y | b.y)); 679#else 680 bool aIsNaN = __is_nan(__a); 681 bool bIsNaN = __is_nan(__b); 682 uvec2 a = unpackUint2x32(__a); 683 uvec2 b = unpackUint2x32(__b); 684 a.y |= 0x00080000u; 685 b.y |= 0x00080000u; 686 687 return packUint2x32(mix(b, mix(a, b, bvec2(bIsNaN, bIsNaN)), bvec2(aIsNaN, aIsNaN))); 688#endif 689} 690 691/* If a shader is in the soft-fp64 path, it almost certainly has register 692 * pressure problems. Choose a method to exchange two values that does not 693 * require a temporary. 694 */ 695#define EXCHANGE(a, b) \ 696 do { \ 697 a ^= b; \ 698 b ^= a; \ 699 a ^= b; \ 700 } while (false) 701 702/* Returns the result of adding the double-precision floating-point values 703 * `a' and `b'. The operation is performed according to the IEEE Standard for 704 * Floating-Point Arithmetic. 705 */ 706uint64_t 707__fadd64(uint64_t a, uint64_t b) 708{ 709 uint aSign = __extractFloat64Sign(a); 710 uint bSign = __extractFloat64Sign(b); 711 uint aFracLo = __extractFloat64FracLo(a); 712 uint aFracHi = __extractFloat64FracHi(a); 713 uint bFracLo = __extractFloat64FracLo(b); 714 uint bFracHi = __extractFloat64FracHi(b); 715 int aExp = __extractFloat64Exp(a); 716 int bExp = __extractFloat64Exp(b); 717 int expDiff = aExp - bExp; 718 if (aSign == bSign) { 719 uint zFrac0; 720 uint zFrac1; 721 uint zFrac2; 722 int zExp; 723 724 if (expDiff == 0) { 725 if (aExp == 0x7FF) { 726 bool propagate = ((aFracHi | bFracHi) | (aFracLo| bFracLo)) != 0u; 727 return mix(a, __propagateFloat64NaN(a, b), propagate); 728 } 729 __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 730 if (aExp == 0) 731 return __packFloat64(aSign, 0, zFrac0, zFrac1); 732 zFrac2 = 0u; 733 zFrac0 |= 0x00200000u; 734 zExp = aExp; 735 __shift64ExtraRightJamming( 736 zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 737 } else { 738 if (expDiff < 0) { 739 EXCHANGE(aFracHi, bFracHi); 740 EXCHANGE(aFracLo, bFracLo); 741 EXCHANGE(aExp, bExp); 742 } 743 744 if (aExp == 0x7FF) { 745 bool propagate = (aFracHi | aFracLo) != 0u; 746 return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 747 } 748 749 expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); 750 bFracHi = mix(bFracHi | 0x00100000u, bFracHi, bExp == 0); 751 __shift64ExtraRightJamming( 752 bFracHi, bFracLo, 0u, expDiff, bFracHi, bFracLo, zFrac2); 753 zExp = aExp; 754 755 aFracHi |= 0x00100000u; 756 __add64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 757 --zExp; 758 if (!(zFrac0 < 0x00200000u)) { 759 __shift64ExtraRightJamming(zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 760 ++zExp; 761 } 762 } 763 return __roundAndPackFloat64(aSign, zExp, zFrac0, zFrac1, zFrac2); 764 765 } else { 766 int zExp; 767 768 __shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo); 769 __shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo); 770 if (expDiff != 0) { 771 uint zFrac0; 772 uint zFrac1; 773 774 if (expDiff < 0) { 775 EXCHANGE(aFracHi, bFracHi); 776 EXCHANGE(aFracLo, bFracLo); 777 EXCHANGE(aExp, bExp); 778 aSign ^= 0x80000000u; 779 } 780 781 if (aExp == 0x7FF) { 782 bool propagate = (aFracHi | aFracLo) != 0u; 783 return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate); 784 } 785 786 expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0); 787 bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0); 788 __shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo); 789 aFracHi |= 0x40000000u; 790 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 791 zExp = aExp; 792 --zExp; 793 return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1); 794 } 795 if (aExp == 0x7FF) { 796 bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u; 797 return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate); 798 } 799 bExp = mix(bExp, 1, aExp == 0); 800 aExp = mix(aExp, 1, aExp == 0); 801 802 uint zFrac0; 803 uint zFrac1; 804 uint sign_of_difference = 0; 805 if (bFracHi < aFracHi) { 806 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 807 } 808 else if (aFracHi < bFracHi) { 809 __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 810 sign_of_difference = 0x80000000; 811 } 812 else if (bFracLo <= aFracLo) { 813 /* It is possible that zFrac0 and zFrac1 may be zero after this. */ 814 __sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1); 815 } 816 else { 817 __sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1); 818 sign_of_difference = 0x80000000; 819 } 820 zExp = mix(bExp, aExp, sign_of_difference == 0u); 821 aSign ^= sign_of_difference; 822 uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u); 823 uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1); 824 return mix(retval_0, retval_1, zFrac0 != 0u || zFrac1 != 0u); 825 } 826} 827 828/* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the 829 * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit 830 * product. The product is broken into four 32-bit pieces which are stored at 831 * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 832 */ 833void 834__mul64To128(uint a0, uint a1, uint b0, uint b1, 835 out uint z0Ptr, 836 out uint z1Ptr, 837 out uint z2Ptr, 838 out uint z3Ptr) 839{ 840 uint z0 = 0u; 841 uint z1 = 0u; 842 uint z2 = 0u; 843 uint z3 = 0u; 844 uint more1 = 0u; 845 uint more2 = 0u; 846 847 umulExtended(a1, b1, z2, z3); 848 umulExtended(a1, b0, z1, more2); 849 __add64(z1, more2, 0u, z2, z1, z2); 850 umulExtended(a0, b0, z0, more1); 851 __add64(z0, more1, 0u, z1, z0, z1); 852 umulExtended(a0, b1, more1, more2); 853 __add64(more1, more2, 0u, z2, more1, z2); 854 __add64(z0, z1, 0u, more1, z0, z1); 855 z3Ptr = z3; 856 z2Ptr = z2; 857 z1Ptr = z1; 858 z0Ptr = z0; 859} 860 861/* Normalizes the subnormal double-precision floating-point value represented 862 * by the denormalized significand formed by the concatenation of `aFrac0' and 863 * `aFrac1'. The normalized exponent is stored at the location pointed to by 864 * `zExpPtr'. The most significant 21 bits of the normalized significand are 865 * stored at the location pointed to by `zFrac0Ptr', and the least significant 866 * 32 bits of the normalized significand are stored at the location pointed to 867 * by `zFrac1Ptr'. 868 */ 869void 870__normalizeFloat64Subnormal(uint aFrac0, uint aFrac1, 871 out int zExpPtr, 872 out uint zFrac0Ptr, 873 out uint zFrac1Ptr) 874{ 875 int shiftCount; 876 uint temp_zfrac0, temp_zfrac1; 877 shiftCount = __countLeadingZeros32(mix(aFrac0, aFrac1, aFrac0 == 0u)) - 11; 878 zExpPtr = mix(1 - shiftCount, -shiftCount - 31, aFrac0 == 0u); 879 880 temp_zfrac0 = mix(aFrac1<<shiftCount, aFrac1>>(-shiftCount), shiftCount < 0); 881 temp_zfrac1 = mix(0u, aFrac1<<(shiftCount & 31), shiftCount < 0); 882 883 __shortShift64Left(aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr); 884 885 zFrac0Ptr = mix(zFrac0Ptr, temp_zfrac0, aFrac0 == 0); 886 zFrac1Ptr = mix(zFrac1Ptr, temp_zfrac1, aFrac0 == 0); 887} 888 889/* Returns the result of multiplying the double-precision floating-point values 890 * `a' and `b'. The operation is performed according to the IEEE Standard for 891 * Floating-Point Arithmetic. 892 */ 893uint64_t 894__fmul64(uint64_t a, uint64_t b) 895{ 896 uint zFrac0 = 0u; 897 uint zFrac1 = 0u; 898 uint zFrac2 = 0u; 899 uint zFrac3 = 0u; 900 int zExp; 901 902 uint aFracLo = __extractFloat64FracLo(a); 903 uint aFracHi = __extractFloat64FracHi(a); 904 uint bFracLo = __extractFloat64FracLo(b); 905 uint bFracHi = __extractFloat64FracHi(b); 906 int aExp = __extractFloat64Exp(a); 907 uint aSign = __extractFloat64Sign(a); 908 int bExp = __extractFloat64Exp(b); 909 uint bSign = __extractFloat64Sign(b); 910 uint zSign = aSign ^ bSign; 911 if (aExp == 0x7FF) { 912 if (((aFracHi | aFracLo) != 0u) || 913 ((bExp == 0x7FF) && ((bFracHi | bFracLo) != 0u))) { 914 return __propagateFloat64NaN(a, b); 915 } 916 if ((uint(bExp) | bFracHi | bFracLo) == 0u) 917 return 0xFFFFFFFFFFFFFFFFUL; 918 return __packFloat64(zSign, 0x7FF, 0u, 0u); 919 } 920 if (bExp == 0x7FF) { 921 /* a cannot be NaN, but is b NaN? */ 922 if ((bFracHi | bFracLo) != 0u) 923#if defined RELAXED_NAN_PROPAGATION 924 return b; 925#else 926 return __propagateFloat64NaN(a, b); 927#endif 928 if ((uint(aExp) | aFracHi | aFracLo) == 0u) 929 return 0xFFFFFFFFFFFFFFFFUL; 930 return __packFloat64(zSign, 0x7FF, 0u, 0u); 931 } 932 if (aExp == 0) { 933 if ((aFracHi | aFracLo) == 0u) 934 return __packFloat64(zSign, 0, 0u, 0u); 935 __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 936 } 937 if (bExp == 0) { 938 if ((bFracHi | bFracLo) == 0u) 939 return __packFloat64(zSign, 0, 0u, 0u); 940 __normalizeFloat64Subnormal(bFracHi, bFracLo, bExp, bFracHi, bFracLo); 941 } 942 zExp = aExp + bExp - 0x400; 943 aFracHi |= 0x00100000u; 944 __shortShift64Left(bFracHi, bFracLo, 12, bFracHi, bFracLo); 945 __mul64To128( 946 aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1, zFrac2, zFrac3); 947 __add64(zFrac0, zFrac1, aFracHi, aFracLo, zFrac0, zFrac1); 948 zFrac2 |= uint(zFrac3 != 0u); 949 if (0x00200000u <= zFrac0) { 950 __shift64ExtraRightJamming( 951 zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2); 952 ++zExp; 953 } 954 return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2); 955} 956 957uint64_t 958__ffma64(uint64_t a, uint64_t b, uint64_t c) 959{ 960 return __fadd64(__fmul64(a, b), c); 961} 962 963/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the 964 * number of bits given in `count'. Any bits shifted off are lost. The value 965 * of `count' can be arbitrarily large; in particular, if `count' is greater 966 * than 64, the result will be 0. The result is broken into two 32-bit pieces 967 * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 968 */ 969void 970__shift64Right(uint a0, uint a1, 971 int count, 972 out uint z0Ptr, 973 out uint z1Ptr) 974{ 975 uint z0; 976 uint z1; 977 int negCount = (-count) & 31; 978 979 z0 = 0u; 980 z0 = mix(z0, (a0 >> count), count < 32); 981 z0 = mix(z0, a0, count == 0); 982 983 z1 = mix(0u, (a0 >> (count & 31)), count < 64); 984 z1 = mix(z1, (a0<<negCount) | (a1>>count), count < 32); 985 z1 = mix(z1, a0, count == 0); 986 987 z1Ptr = z1; 988 z0Ptr = z0; 989} 990 991/* Returns the result of converting the double-precision floating-point value 992 * `a' to the unsigned integer format. The conversion is performed according 993 * to the IEEE Standard for Floating-Point Arithmetic. 994 */ 995uint 996__fp64_to_uint(uint64_t a) 997{ 998 uint aFracLo = __extractFloat64FracLo(a); 999 uint aFracHi = __extractFloat64FracHi(a); 1000 int aExp = __extractFloat64Exp(a); 1001 uint aSign = __extractFloat64Sign(a); 1002 1003 if ((aExp == 0x7FF) && ((aFracHi | aFracLo) != 0u)) 1004 return 0xFFFFFFFFu; 1005 1006 aFracHi |= mix(0u, 0x00100000u, aExp != 0); 1007 1008 int shiftDist = 0x427 - aExp; 1009 if (0 < shiftDist) 1010 __shift64RightJamming(aFracHi, aFracLo, shiftDist, aFracHi, aFracLo); 1011 1012 if ((aFracHi & 0xFFFFF000u) != 0u) 1013 return mix(~0u, 0u, aSign != 0u); 1014 1015 uint z = 0u; 1016 uint zero = 0u; 1017 __shift64Right(aFracHi, aFracLo, 12, zero, z); 1018 1019 uint expt = mix(~0u, 0u, aSign != 0u); 1020 1021 return mix(z, expt, (aSign != 0u) && (z != 0u)); 1022} 1023 1024uint64_t 1025__uint_to_fp64(uint a) 1026{ 1027 if (a == 0u) 1028 return 0ul; 1029 1030 int shiftDist = __countLeadingZeros32(a) + 21; 1031 1032 uint aHigh = 0u; 1033 uint aLow = 0u; 1034 int negCount = (- shiftDist) & 31; 1035 1036 aHigh = mix(0u, a<< shiftDist - 32, shiftDist < 64); 1037 aLow = 0u; 1038 aHigh = mix(aHigh, 0u, shiftDist == 0); 1039 aLow = mix(aLow, a, shiftDist ==0); 1040 aHigh = mix(aHigh, a >> negCount, shiftDist < 32); 1041 aLow = mix(aLow, a << shiftDist, shiftDist < 32); 1042 1043 return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow); 1044} 1045 1046uint64_t 1047__uint64_to_fp64(uint64_t a) 1048{ 1049 if (a == 0u) 1050 return 0ul; 1051 1052 uvec2 aFrac = unpackUint2x32(a); 1053 uint aFracLo = __extractFloat64FracLo(a); 1054 uint aFracHi = __extractFloat64FracHi(a); 1055 1056 if ((aFracHi & 0x80000000u) != 0u) { 1057 __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo); 1058 return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u); 1059 } else { 1060 return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x); 1061 } 1062} 1063 1064uint64_t 1065__fp64_to_uint64(uint64_t a) 1066{ 1067 uint aFracLo = __extractFloat64FracLo(a); 1068 uint aFracHi = __extractFloat64FracHi(a); 1069 int aExp = __extractFloat64Exp(a); 1070 uint aSign = __extractFloat64Sign(a); 1071 uint zFrac2 = 0u; 1072 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 1073 1074 aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 1075 int shiftCount = 0x433 - aExp; 1076 1077 if ( shiftCount <= 0 ) { 1078 if (shiftCount < -11 && aExp == 0x7FF) { 1079 if ((aFracHi | aFracLo) != 0u) 1080 return __propagateFloat64NaN(a, a); 1081 return mix(default_nan, a, aSign == 0u); 1082 } 1083 __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 1084 } else { 1085 __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 1086 aFracHi, aFracLo, zFrac2); 1087 } 1088 return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2); 1089} 1090 1091int64_t 1092__fp64_to_int64(uint64_t a) 1093{ 1094 uint zFrac2 = 0u; 1095 uint aFracLo = __extractFloat64FracLo(a); 1096 uint aFracHi = __extractFloat64FracHi(a); 1097 int aExp = __extractFloat64Exp(a); 1098 uint aSign = __extractFloat64Sign(a); 1099 int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL; 1100 int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL; 1101 1102 aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0); 1103 int shiftCount = 0x433 - aExp; 1104 1105 if (shiftCount <= 0) { 1106 if (shiftCount < -11 && aExp == 0x7FF) { 1107 if ((aFracHi | aFracLo) != 0u) 1108 return default_NegNaN; 1109 return mix(default_NegNaN, default_PosNaN, aSign == 0u); 1110 } 1111 __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo); 1112 } else { 1113 __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount, 1114 aFracHi, aFracLo, zFrac2); 1115 } 1116 1117 return __roundAndPackInt64(aSign, aFracHi, aFracLo, zFrac2); 1118} 1119 1120uint64_t 1121__int64_to_fp64(int64_t a) 1122{ 1123 if (a==0) 1124 return 0ul; 1125 1126 uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0); 1127 uint aFracHi = __extractFloat64FracHi(absA); 1128 uvec2 aFrac = unpackUint2x32(absA); 1129 uint zSign = uint(unpackInt2x32(a).y) & 0x80000000u; 1130 1131 if ((aFracHi & 0x80000000u) != 0u) { 1132 return mix(0ul, __packFloat64(0x80000000u, 0x434, 0u, 0u), a < 0); 1133 } 1134 1135 return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x); 1136} 1137 1138/* Returns the result of converting the double-precision floating-point value 1139 * `a' to the 32-bit two's complement integer format. The conversion is 1140 * performed according to the IEEE Standard for Floating-Point Arithmetic--- 1141 * which means in particular that the conversion is rounded according to the 1142 * current rounding mode. If `a' is a NaN, the largest positive integer is 1143 * returned. Otherwise, if the conversion overflows, the largest integer with 1144 * the same sign as `a' is returned. 1145 */ 1146int 1147__fp64_to_int(uint64_t a) 1148{ 1149 uint aFracLo = __extractFloat64FracLo(a); 1150 uint aFracHi = __extractFloat64FracHi(a); 1151 int aExp = __extractFloat64Exp(a); 1152 uint aSign = __extractFloat64Sign(a); 1153 1154 uint absZ = 0u; 1155 uint aFracExtra = 0u; 1156 int shiftCount = aExp - 0x413; 1157 1158 if (0 <= shiftCount) { 1159 if (0x41E < aExp) { 1160 if ((aExp == 0x7FF) && bool(aFracHi | aFracLo)) 1161 aSign = 0u; 1162 return mix(0x7FFFFFFF, 0x80000000, aSign != 0u); 1163 } 1164 __shortShift64Left(aFracHi | 0x00100000u, aFracLo, shiftCount, absZ, aFracExtra); 1165 } else { 1166 if (aExp < 0x3FF) 1167 return 0; 1168 1169 aFracHi |= 0x00100000u; 1170 aFracExtra = ( aFracHi << (shiftCount & 31)) | aFracLo; 1171 absZ = aFracHi >> (- shiftCount); 1172 } 1173 1174 int z = mix(int(absZ), -int(absZ), aSign != 0u); 1175 int nan = mix(0x7FFFFFFF, 0x80000000, aSign != 0u); 1176 return mix(z, nan, ((aSign != 0u) != (z < 0)) && bool(z)); 1177} 1178 1179/* Returns the result of converting the 32-bit two's complement integer `a' 1180 * to the double-precision floating-point format. The conversion is performed 1181 * according to the IEEE Standard for Floating-Point Arithmetic. 1182 */ 1183uint64_t 1184__int_to_fp64(int a) 1185{ 1186 uint zFrac0 = 0u; 1187 uint zFrac1 = 0u; 1188 if (a==0) 1189 return __packFloat64(0u, 0, 0u, 0u); 1190 uint zSign = uint(a) & 0x80000000u; 1191 uint absA = mix(uint(a), uint(-a), a < 0); 1192 int shiftCount = __countLeadingZeros32(absA) - 11; 1193 if (0 <= shiftCount) { 1194 zFrac0 = absA << shiftCount; 1195 zFrac1 = 0u; 1196 } else { 1197 __shift64Right(absA, 0u, -shiftCount, zFrac0, zFrac1); 1198 } 1199 return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1); 1200} 1201 1202uint64_t 1203__bool_to_fp64(bool a) 1204{ 1205 return packUint2x32(uvec2(0x00000000u, uint(-int(a) & 0x3ff00000))); 1206} 1207 1208/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a 1209 * single-precision floating-point value, returning the result. After being 1210 * shifted into the proper positions, the three fields are simply added 1211 * together to form the result. This means that any integer portion of `zSig' 1212 * will be added into the exponent. Since a properly normalized significand 1213 * will have an integer portion equal to 1, the `zExp' input should be 1 less 1214 * than the desired result exponent whenever `zFrac' is a complete, normalized 1215 * significand. 1216 */ 1217float 1218__packFloat32(uint zSign, int zExp, uint zFrac) 1219{ 1220 return uintBitsToFloat(zSign + (uint(zExp)<<23) + zFrac); 1221} 1222 1223/* Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1224 * and significand `zFrac', and returns the proper single-precision floating- 1225 * point value corresponding to the abstract input. Ordinarily, the abstract 1226 * value is simply rounded and packed into the single-precision format, with 1227 * the inexact exception raised if the abstract input cannot be represented 1228 * exactly. However, if the abstract value is too large, the overflow and 1229 * inexact exceptions are raised and an infinity or maximal finite value is 1230 * returned. If the abstract value is too small, the input value is rounded to 1231 * a subnormal number, and the underflow and inexact exceptions are raised if 1232 * the abstract input cannot be represented exactly as a subnormal single- 1233 * precision floating-point number. 1234 * The input significand `zFrac' has its binary point between bits 30 1235 * and 29, which is 7 bits to the left of the usual location. This shifted 1236 * significand must be normalized or smaller. If `zFrac' is not normalized, 1237 * `zExp' must be 0; in that case, the result returned is a subnormal number, 1238 * and it must not require rounding. In the usual case that `zFrac' is 1239 * normalized, `zExp' must be 1 less than the "true" floating-point exponent. 1240 * The handling of underflow and overflow follows the IEEE Standard for 1241 * Floating-Point Arithmetic. 1242 */ 1243float 1244__roundAndPackFloat32(uint zSign, int zExp, uint zFrac) 1245{ 1246 bool roundNearestEven; 1247 int roundIncrement; 1248 int roundBits; 1249 1250 roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN; 1251 roundIncrement = 0x40; 1252 if (!roundNearestEven) { 1253 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_TO_ZERO) { 1254 roundIncrement = 0; 1255 } else { 1256 roundIncrement = 0x7F; 1257 if (zSign != 0u) { 1258 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) 1259 roundIncrement = 0; 1260 } else { 1261 if (FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) 1262 roundIncrement = 0; 1263 } 1264 } 1265 } 1266 roundBits = int(zFrac & 0x7Fu); 1267 if (0xFDu <= uint(zExp)) { 1268 if ((0xFD < zExp) || ((zExp == 0xFD) && (int(zFrac) + roundIncrement) < 0)) 1269 return __packFloat32(zSign, 0xFF, 0u) - float(roundIncrement == 0); 1270 int count = -zExp; 1271 bool zexp_lt0 = zExp < 0; 1272 uint zFrac_lt0 = mix(uint(zFrac != 0u), (zFrac>>count) | uint((zFrac<<((-count) & 31)) != 0u), (-zExp) < 32); 1273 zFrac = mix(zFrac, zFrac_lt0, zexp_lt0); 1274 roundBits = mix(roundBits, int(zFrac) & 0x7f, zexp_lt0); 1275 zExp = mix(zExp, 0, zexp_lt0); 1276 } 1277 zFrac = (zFrac + uint(roundIncrement))>>7; 1278 zFrac &= ~uint(((roundBits ^ 0x40) == 0) && roundNearestEven); 1279 1280 return __packFloat32(zSign, mix(zExp, 0, zFrac == 0u), zFrac); 1281} 1282 1283/* Returns the result of converting the double-precision floating-point value 1284 * `a' to the single-precision floating-point format. The conversion is 1285 * performed according to the IEEE Standard for Floating-Point Arithmetic. 1286 */ 1287float 1288__fp64_to_fp32(uint64_t __a) 1289{ 1290 uvec2 a = unpackUint2x32(__a); 1291 uint zFrac = 0u; 1292 uint allZero = 0u; 1293 1294 uint aFracLo = __extractFloat64FracLo(__a); 1295 uint aFracHi = __extractFloat64FracHi(__a); 1296 int aExp = __extractFloat64Exp(__a); 1297 uint aSign = __extractFloat64Sign(__a); 1298 if (aExp == 0x7FF) { 1299 __shortShift64Left(a.y, a.x, 12, a.y, a.x); 1300 float rval = uintBitsToFloat(aSign | 0x7FC00000u | (a.y>>9)); 1301 rval = mix(__packFloat32(aSign, 0xFF, 0u), rval, (aFracHi | aFracLo) != 0u); 1302 return rval; 1303 } 1304 __shift64RightJamming(aFracHi, aFracLo, 22, allZero, zFrac); 1305 zFrac = mix(zFrac, zFrac | 0x40000000u, aExp != 0); 1306 return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac); 1307} 1308 1309/* Returns the result of converting the single-precision floating-point value 1310 * `a' to the double-precision floating-point format. 1311 */ 1312uint64_t 1313__fp32_to_fp64(float f) 1314{ 1315 uint a = floatBitsToUint(f); 1316 uint aFrac = a & 0x007FFFFFu; 1317 int aExp = int((a>>23) & 0xFFu); 1318 uint aSign = a & 0x80000000u; 1319 uint zFrac0 = 0u; 1320 uint zFrac1 = 0u; 1321 1322 if (aExp == 0xFF) { 1323 if (aFrac != 0u) { 1324 uint nanLo = 0u; 1325 uint nanHi = a<<9; 1326 __shift64Right(nanHi, nanLo, 12, nanHi, nanLo); 1327 nanHi |= aSign | 0x7FF80000u; 1328 return packUint2x32(uvec2(nanLo, nanHi)); 1329 } 1330 return __packFloat64(aSign, 0x7FF, 0u, 0u); 1331 } 1332 1333 if (aExp == 0) { 1334 if (aFrac == 0u) 1335 return __packFloat64(aSign, 0, 0u, 0u); 1336 /* Normalize subnormal */ 1337 int shiftCount = __countLeadingZeros32(aFrac) - 8; 1338 aFrac <<= shiftCount; 1339 aExp = 1 - shiftCount; 1340 --aExp; 1341 } 1342 1343 __shift64Right(aFrac, 0u, 3, zFrac0, zFrac1); 1344 return __packFloat64(aSign, aExp + 0x380, zFrac0, zFrac1); 1345} 1346 1347/* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the 1348 * 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 1349 * modulo 2^96, so any carry out is lost. The result is broken into three 1350 * 32-bit pieces which are stored at the locations pointed to by `z0Ptr', 1351 * `z1Ptr', and `z2Ptr'. 1352 */ 1353void 1354__add96(uint a0, uint a1, uint a2, 1355 uint b0, uint b1, uint b2, 1356 out uint z0Ptr, 1357 out uint z1Ptr, 1358 out uint z2Ptr) 1359{ 1360 uint z2 = a2 + b2; 1361 uint carry1 = uint(z2 < a2); 1362 uint z1 = a1 + b1; 1363 uint carry0 = uint(z1 < a1); 1364 uint z0 = a0 + b0; 1365 z1 += carry1; 1366 z0 += uint(z1 < carry1); 1367 z0 += carry0; 1368 z2Ptr = z2; 1369 z1Ptr = z1; 1370 z0Ptr = z0; 1371} 1372 1373/* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from 1374 * the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction 1375 * is modulo 2^96, so any borrow out (carry out) is lost. The result is broken 1376 * into three 32-bit pieces which are stored at the locations pointed to by 1377 * `z0Ptr', `z1Ptr', and `z2Ptr'. 1378 */ 1379void 1380__sub96(uint a0, uint a1, uint a2, 1381 uint b0, uint b1, uint b2, 1382 out uint z0Ptr, 1383 out uint z1Ptr, 1384 out uint z2Ptr) 1385{ 1386 uint z2 = a2 - b2; 1387 uint borrow1 = uint(a2 < b2); 1388 uint z1 = a1 - b1; 1389 uint borrow0 = uint(a1 < b1); 1390 uint z0 = a0 - b0; 1391 z0 -= uint(z1 < borrow1); 1392 z1 -= borrow1; 1393 z0 -= borrow0; 1394 z2Ptr = z2; 1395 z1Ptr = z1; 1396 z0Ptr = z0; 1397} 1398 1399/* Returns an approximation to the 32-bit integer quotient obtained by dividing 1400 * `b' into the 64-bit value formed by concatenating `a0' and `a1'. The 1401 * divisor `b' must be at least 2^31. If q is the exact quotient truncated 1402 * toward zero, the approximation returned lies between q and q + 2 inclusive. 1403 * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit 1404 * unsigned integer is returned. 1405 */ 1406uint 1407__estimateDiv64To32(uint a0, uint a1, uint b) 1408{ 1409 uint b0; 1410 uint b1; 1411 uint rem0 = 0u; 1412 uint rem1 = 0u; 1413 uint term0 = 0u; 1414 uint term1 = 0u; 1415 uint z; 1416 1417 if (b <= a0) 1418 return 0xFFFFFFFFu; 1419 b0 = b>>16; 1420 z = (b0<<16 <= a0) ? 0xFFFF0000u : (a0 / b0)<<16; 1421 umulExtended(b, z, term0, term1); 1422 __sub64(a0, a1, term0, term1, rem0, rem1); 1423 while (int(rem0) < 0) { 1424 z -= 0x10000u; 1425 b1 = b<<16; 1426 __add64(rem0, rem1, b0, b1, rem0, rem1); 1427 } 1428 rem0 = (rem0<<16) | (rem1>>16); 1429 z |= (b0<<16 <= rem0) ? 0xFFFFu : rem0 / b0; 1430 return z; 1431} 1432 1433uint 1434__sqrtOddAdjustments(int index) 1435{ 1436 uint res = 0u; 1437 if (index == 0) 1438 res = 0x0004u; 1439 if (index == 1) 1440 res = 0x0022u; 1441 if (index == 2) 1442 res = 0x005Du; 1443 if (index == 3) 1444 res = 0x00B1u; 1445 if (index == 4) 1446 res = 0x011Du; 1447 if (index == 5) 1448 res = 0x019Fu; 1449 if (index == 6) 1450 res = 0x0236u; 1451 if (index == 7) 1452 res = 0x02E0u; 1453 if (index == 8) 1454 res = 0x039Cu; 1455 if (index == 9) 1456 res = 0x0468u; 1457 if (index == 10) 1458 res = 0x0545u; 1459 if (index == 11) 1460 res = 0x631u; 1461 if (index == 12) 1462 res = 0x072Bu; 1463 if (index == 13) 1464 res = 0x0832u; 1465 if (index == 14) 1466 res = 0x0946u; 1467 if (index == 15) 1468 res = 0x0A67u; 1469 1470 return res; 1471} 1472 1473uint 1474__sqrtEvenAdjustments(int index) 1475{ 1476 uint res = 0u; 1477 if (index == 0) 1478 res = 0x0A2Du; 1479 if (index == 1) 1480 res = 0x08AFu; 1481 if (index == 2) 1482 res = 0x075Au; 1483 if (index == 3) 1484 res = 0x0629u; 1485 if (index == 4) 1486 res = 0x051Au; 1487 if (index == 5) 1488 res = 0x0429u; 1489 if (index == 6) 1490 res = 0x0356u; 1491 if (index == 7) 1492 res = 0x029Eu; 1493 if (index == 8) 1494 res = 0x0200u; 1495 if (index == 9) 1496 res = 0x0179u; 1497 if (index == 10) 1498 res = 0x0109u; 1499 if (index == 11) 1500 res = 0x00AFu; 1501 if (index == 12) 1502 res = 0x0068u; 1503 if (index == 13) 1504 res = 0x0034u; 1505 if (index == 14) 1506 res = 0x0012u; 1507 if (index == 15) 1508 res = 0x0002u; 1509 1510 return res; 1511} 1512 1513/* Returns an approximation to the square root of the 32-bit significand given 1514 * by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 1515 * `aExp' (the least significant bit) is 1, the integer returned approximates 1516 * 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 1517 * is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 1518 * case, the approximation returned lies strictly within +/-2 of the exact 1519 * value. 1520 */ 1521uint 1522__estimateSqrt32(int aExp, uint a) 1523{ 1524 uint z; 1525 1526 int index = int(a>>27 & 15u); 1527 if ((aExp & 1) != 0) { 1528 z = 0x4000u + (a>>17) - __sqrtOddAdjustments(index); 1529 z = ((a / z)<<14) + (z<<15); 1530 a >>= 1; 1531 } else { 1532 z = 0x8000u + (a>>17) - __sqrtEvenAdjustments(index); 1533 z = a / z + z; 1534 z = (0x20000u <= z) ? 0xFFFF8000u : (z<<15); 1535 if (z <= a) 1536 return uint(int(a)>>1); 1537 } 1538 return ((__estimateDiv64To32(a, 0u, z))>>1) + (z>>1); 1539} 1540 1541/* Returns the square root of the double-precision floating-point value `a'. 1542 * The operation is performed according to the IEEE Standard for Floating-Point 1543 * Arithmetic. 1544 */ 1545uint64_t 1546__fsqrt64(uint64_t a) 1547{ 1548 uint zFrac0 = 0u; 1549 uint zFrac1 = 0u; 1550 uint zFrac2 = 0u; 1551 uint doubleZFrac0 = 0u; 1552 uint rem0 = 0u; 1553 uint rem1 = 0u; 1554 uint rem2 = 0u; 1555 uint rem3 = 0u; 1556 uint term0 = 0u; 1557 uint term1 = 0u; 1558 uint term2 = 0u; 1559 uint term3 = 0u; 1560 uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL; 1561 1562 uint aFracLo = __extractFloat64FracLo(a); 1563 uint aFracHi = __extractFloat64FracHi(a); 1564 int aExp = __extractFloat64Exp(a); 1565 uint aSign = __extractFloat64Sign(a); 1566 if (aExp == 0x7FF) { 1567 if ((aFracHi | aFracLo) != 0u) 1568 return __propagateFloat64NaN(a, a); 1569 if (aSign == 0u) 1570 return a; 1571 return default_nan; 1572 } 1573 if (aSign != 0u) { 1574 if ((uint(aExp) | aFracHi | aFracLo) == 0u) 1575 return a; 1576 return default_nan; 1577 } 1578 if (aExp == 0) { 1579 if ((aFracHi | aFracLo) == 0u) 1580 return __packFloat64(0u, 0, 0u, 0u); 1581 __normalizeFloat64Subnormal(aFracHi, aFracLo, aExp, aFracHi, aFracLo); 1582 } 1583 int zExp = ((aExp - 0x3FF)>>1) + 0x3FE; 1584 aFracHi |= 0x00100000u; 1585 __shortShift64Left(aFracHi, aFracLo, 11, term0, term1); 1586 zFrac0 = (__estimateSqrt32(aExp, term0)>>1) + 1u; 1587 if (zFrac0 == 0u) 1588 zFrac0 = 0x7FFFFFFFu; 1589 doubleZFrac0 = zFrac0 + zFrac0; 1590 __shortShift64Left(aFracHi, aFracLo, 9 - (aExp & 1), aFracHi, aFracLo); 1591 umulExtended(zFrac0, zFrac0, term0, term1); 1592 __sub64(aFracHi, aFracLo, term0, term1, rem0, rem1); 1593 while (int(rem0) < 0) { 1594 --zFrac0; 1595 doubleZFrac0 -= 2u; 1596 __add64(rem0, rem1, 0u, doubleZFrac0 | 1u, rem0, rem1); 1597 } 1598 zFrac1 = __estimateDiv64To32(rem1, 0u, doubleZFrac0); 1599 if ((zFrac1 & 0x1FFu) <= 5u) { 1600 if (zFrac1 == 0u) 1601 zFrac1 = 1u; 1602 umulExtended(doubleZFrac0, zFrac1, term1, term2); 1603 __sub64(rem1, 0u, term1, term2, rem1, rem2); 1604 umulExtended(zFrac1, zFrac1, term2, term3); 1605 __sub96(rem1, rem2, 0u, 0u, term2, term3, rem1, rem2, rem3); 1606 while (int(rem1) < 0) { 1607 --zFrac1; 1608 __shortShift64Left(0u, zFrac1, 1, term2, term3); 1609 term3 |= 1u; 1610 term2 |= doubleZFrac0; 1611 __add96(rem1, rem2, rem3, 0u, term2, term3, rem1, rem2, rem3); 1612 } 1613 zFrac1 |= uint((rem1 | rem2 | rem3) != 0u); 1614 } 1615 __shift64ExtraRightJamming(zFrac0, zFrac1, 0u, 10, zFrac0, zFrac1, zFrac2); 1616 return __roundAndPackFloat64(0u, zExp, zFrac0, zFrac1, zFrac2); 1617} 1618 1619uint64_t 1620__ftrunc64(uint64_t __a) 1621{ 1622 uvec2 a = unpackUint2x32(__a); 1623 int aExp = __extractFloat64Exp(__a); 1624 uint zLo; 1625 uint zHi; 1626 1627 int unbiasedExp = aExp - 1023; 1628 int fracBits = 52 - unbiasedExp; 1629 uint maskLo = mix(~0u << fracBits, 0u, fracBits >= 32); 1630 uint maskHi = mix(~0u << (fracBits - 32), ~0u, fracBits < 33); 1631 zLo = maskLo & a.x; 1632 zHi = maskHi & a.y; 1633 1634 zLo = mix(zLo, 0u, unbiasedExp < 0); 1635 zHi = mix(zHi, 0u, unbiasedExp < 0); 1636 zLo = mix(zLo, a.x, unbiasedExp > 52); 1637 zHi = mix(zHi, a.y, unbiasedExp > 52); 1638 return packUint2x32(uvec2(zLo, zHi)); 1639} 1640 1641uint64_t 1642__ffloor64(uint64_t a) 1643{ 1644 /* The big assumption is that when 'a' is NaN, __ftrunc(a) returns a. Based 1645 * on that assumption, NaN values that don't have the sign bit will safely 1646 * return NaN (identity). This is guarded by RELAXED_NAN_PROPAGATION 1647 * because otherwise the NaN should have the "signal" bit set. The 1648 * __fadd64 will ensure that occurs. 1649 */ 1650 bool is_positive = 1651#if defined RELAXED_NAN_PROPAGATION 1652 int(unpackUint2x32(a).y) >= 0 1653#else 1654 __fge64(a, 0ul) 1655#endif 1656 ; 1657 uint64_t tr = __ftrunc64(a); 1658 1659 if (is_positive || __feq64(tr, a)) { 1660 return tr; 1661 } else { 1662 return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */); 1663 } 1664} 1665 1666uint64_t 1667__fround64(uint64_t __a) 1668{ 1669 uvec2 a = unpackUint2x32(__a); 1670 int unbiasedExp = __extractFloat64Exp(__a) - 1023; 1671 uint aHi = a.y; 1672 uint aLo = a.x; 1673 1674 if (unbiasedExp < 20) { 1675 if (unbiasedExp < 0) { 1676 if ((aHi & 0x80000000u) != 0u && aLo == 0u) { 1677 return 0ul; 1678 } 1679 aHi &= 0x80000000u; 1680 if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) { 1681 aLo = 0u; 1682 return packUint2x32(uvec2(aLo, aHi)); 1683 } 1684 aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1); 1685 aLo = 0u; 1686 } else { 1687 uint maskExp = 0x000FFFFFu >> unbiasedExp; 1688 uint lastBit = maskExp + 1; 1689 aHi += 0x00080000u >> unbiasedExp; 1690 if ((aHi & maskExp) == 0u) 1691 aHi &= ~lastBit; 1692 aHi &= ~maskExp; 1693 aLo = 0u; 1694 } 1695 } else if (unbiasedExp > 51 || unbiasedExp == 1024) { 1696 return __a; 1697 } else { 1698 uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20); 1699 if ((aLo & maskExp) == 0u) 1700 return __a; 1701 uint tmp = aLo + (1u << (51 - unbiasedExp)); 1702 if(tmp < aLo) 1703 aHi += 1u; 1704 aLo = tmp; 1705 aLo &= ~maskExp; 1706 } 1707 1708 return packUint2x32(uvec2(aLo, aHi)); 1709} 1710 1711uint64_t 1712__fmin64(uint64_t a, uint64_t b) 1713{ 1714 /* This weird layout matters. Doing the "obvious" thing results in extra 1715 * flow control being inserted to implement the short-circuit evaluation 1716 * rules. Flow control is bad! 1717 */ 1718 bool b_nan = __is_nan(b); 1719 bool a_lt_b = __flt64_nonnan_minmax(a, b); 1720 bool a_nan = __is_nan(a); 1721 1722 return (b_nan || a_lt_b) && !a_nan ? a : b; 1723} 1724 1725uint64_t 1726__fmax64(uint64_t a, uint64_t b) 1727{ 1728 /* This weird layout matters. Doing the "obvious" thing results in extra 1729 * flow control being inserted to implement the short-circuit evaluation 1730 * rules. Flow control is bad! 1731 */ 1732 bool b_nan = __is_nan(b); 1733 bool a_lt_b = __flt64_nonnan_minmax(a, b); 1734 bool a_nan = __is_nan(a); 1735 1736 return (b_nan || a_lt_b) && !a_nan ? b : a; 1737} 1738 1739uint64_t 1740__ffract64(uint64_t a) 1741{ 1742 return __fadd64(a, __fneg64(__ffloor64(a))); 1743} 1744 1745bool 1746__fisfinite64(uint64_t __a) 1747{ 1748 return __extractFloat64Exp(__a) != 0x7ff; 1749} 1750