1 // Copyright (c) 2015-2016 The Khronos Group Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef LIBSPIRV_UTIL_HEX_FLOAT_H_ 16 #define LIBSPIRV_UTIL_HEX_FLOAT_H_ 17 18 #include <cassert> 19 #include <cctype> 20 #include <cmath> 21 #include <cstdint> 22 #include <iomanip> 23 #include <limits> 24 #include <sstream> 25 26 #include "bitutils.h" 27 28 namespace spvutils { 29 30 class Float16 { 31 public: Float16(uint16_t v)32 Float16(uint16_t v) : val(v) {} Float16()33 Float16() {} isNan(const Float16 & val)34 static bool isNan(const Float16& val) { 35 return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) != 0); 36 } 37 // Returns true if the given value is any kind of infinity. isInfinity(const Float16 & val)38 static bool isInfinity(const Float16& val) { 39 return ((val.val & 0x7C00) == 0x7C00) && ((val.val & 0x3FF) == 0); 40 } Float16(const Float16 & other)41 Float16(const Float16& other) { val = other.val; } get_value()42 uint16_t get_value() const { return val; } 43 44 // Returns the maximum normal value. max()45 static Float16 max() { return Float16(0x7bff); } 46 // Returns the lowest normal value. lowest()47 static Float16 lowest() { return Float16(0xfbff); } 48 49 private: 50 uint16_t val; 51 }; 52 53 // To specialize this type, you must override uint_type to define 54 // an unsigned integer that can fit your floating point type. 55 // You must also add a isNan function that returns true if 56 // a value is Nan. 57 template <typename T> 58 struct FloatProxyTraits { 59 typedef void uint_type; 60 }; 61 62 template <> 63 struct FloatProxyTraits<float> { 64 typedef uint32_t uint_type; 65 static bool isNan(float f) { return std::isnan(f); } 66 // Returns true if the given value is any kind of infinity. 67 static bool isInfinity(float f) { return std::isinf(f); } 68 // Returns the maximum normal value. 69 static float max() { return std::numeric_limits<float>::max(); } 70 // Returns the lowest normal value. 71 static float lowest() { return std::numeric_limits<float>::lowest(); } 72 }; 73 74 template <> 75 struct FloatProxyTraits<double> { 76 typedef uint64_t uint_type; 77 static bool isNan(double f) { return std::isnan(f); } 78 // Returns true if the given value is any kind of infinity. 79 static bool isInfinity(double f) { return std::isinf(f); } 80 // Returns the maximum normal value. 81 static double max() { return std::numeric_limits<double>::max(); } 82 // Returns the lowest normal value. 83 static double lowest() { return std::numeric_limits<double>::lowest(); } 84 }; 85 86 template <> 87 struct FloatProxyTraits<Float16> { 88 typedef uint16_t uint_type; 89 static bool isNan(Float16 f) { return Float16::isNan(f); } 90 // Returns true if the given value is any kind of infinity. 91 static bool isInfinity(Float16 f) { return Float16::isInfinity(f); } 92 // Returns the maximum normal value. 93 static Float16 max() { return Float16::max(); } 94 // Returns the lowest normal value. 95 static Float16 lowest() { return Float16::lowest(); } 96 }; 97 98 // Since copying a floating point number (especially if it is NaN) 99 // does not guarantee that bits are preserved, this class lets us 100 // store the type and use it as a float when necessary. 101 template <typename T> 102 class FloatProxy { 103 public: 104 typedef typename FloatProxyTraits<T>::uint_type uint_type; 105 106 // Since this is to act similar to the normal floats, 107 // do not initialize the data by default. 108 FloatProxy() {} 109 110 // Intentionally non-explicit. This is a proxy type so 111 // implicit conversions allow us to use it more transparently. 112 FloatProxy(T val) { data_ = BitwiseCast<uint_type>(val); } 113 114 // Intentionally non-explicit. This is a proxy type so 115 // implicit conversions allow us to use it more transparently. 116 FloatProxy(uint_type val) { data_ = val; } 117 118 // This is helpful to have and is guaranteed not to stomp bits. 119 FloatProxy<T> operator-() const { 120 return static_cast<uint_type>(data_ ^ 121 (uint_type(0x1) << (sizeof(T) * 8 - 1))); 122 } 123 124 // Returns the data as a floating point value. 125 T getAsFloat() const { return BitwiseCast<T>(data_); } 126 127 // Returns the raw data. 128 uint_type data() const { return data_; } 129 130 // Returns true if the value represents any type of NaN. 131 bool isNan() { return FloatProxyTraits<T>::isNan(getAsFloat()); } 132 // Returns true if the value represents any type of infinity. 133 bool isInfinity() { return FloatProxyTraits<T>::isInfinity(getAsFloat()); } 134 135 // Returns the maximum normal value. 136 static FloatProxy<T> max() { 137 return FloatProxy<T>(FloatProxyTraits<T>::max()); 138 } 139 // Returns the lowest normal value. 140 static FloatProxy<T> lowest() { 141 return FloatProxy<T>(FloatProxyTraits<T>::lowest()); 142 } 143 144 private: 145 uint_type data_; 146 }; 147 148 template <typename T> 149 bool operator==(const FloatProxy<T>& first, const FloatProxy<T>& second) { 150 return first.data() == second.data(); 151 } 152 153 // Reads a FloatProxy value as a normal float from a stream. 154 template <typename T> 155 std::istream& operator>>(std::istream& is, FloatProxy<T>& value) { 156 T float_val; 157 is >> float_val; 158 value = FloatProxy<T>(float_val); 159 return is; 160 } 161 162 // This is an example traits. It is not meant to be used in practice, but will 163 // be the default for any non-specialized type. 164 template <typename T> 165 struct HexFloatTraits { 166 // Integer type that can store this hex-float. 167 typedef void uint_type; 168 // Signed integer type that can store this hex-float. 169 typedef void int_type; 170 // The numerical type that this HexFloat represents. 171 typedef void underlying_type; 172 // The type needed to construct the underlying type. 173 typedef void native_type; 174 // The number of bits that are actually relevant in the uint_type. 175 // This allows us to deal with, for example, 24-bit values in a 32-bit 176 // integer. 177 static const uint32_t num_used_bits = 0; 178 // Number of bits that represent the exponent. 179 static const uint32_t num_exponent_bits = 0; 180 // Number of bits that represent the fractional part. 181 static const uint32_t num_fraction_bits = 0; 182 // The bias of the exponent. (How much we need to subtract from the stored 183 // value to get the correct value.) 184 static const uint32_t exponent_bias = 0; 185 }; 186 187 // Traits for IEEE float. 188 // 1 sign bit, 8 exponent bits, 23 fractional bits. 189 template <> 190 struct HexFloatTraits<FloatProxy<float>> { 191 typedef uint32_t uint_type; 192 typedef int32_t int_type; 193 typedef FloatProxy<float> underlying_type; 194 typedef float native_type; 195 static const uint_type num_used_bits = 32; 196 static const uint_type num_exponent_bits = 8; 197 static const uint_type num_fraction_bits = 23; 198 static const uint_type exponent_bias = 127; 199 }; 200 201 // Traits for IEEE double. 202 // 1 sign bit, 11 exponent bits, 52 fractional bits. 203 template <> 204 struct HexFloatTraits<FloatProxy<double>> { 205 typedef uint64_t uint_type; 206 typedef int64_t int_type; 207 typedef FloatProxy<double> underlying_type; 208 typedef double native_type; 209 static const uint_type num_used_bits = 64; 210 static const uint_type num_exponent_bits = 11; 211 static const uint_type num_fraction_bits = 52; 212 static const uint_type exponent_bias = 1023; 213 }; 214 215 // Traits for IEEE half. 216 // 1 sign bit, 5 exponent bits, 10 fractional bits. 217 template <> 218 struct HexFloatTraits<FloatProxy<Float16>> { 219 typedef uint16_t uint_type; 220 typedef int16_t int_type; 221 typedef uint16_t underlying_type; 222 typedef uint16_t native_type; 223 static const uint_type num_used_bits = 16; 224 static const uint_type num_exponent_bits = 5; 225 static const uint_type num_fraction_bits = 10; 226 static const uint_type exponent_bias = 15; 227 }; 228 229 enum round_direction { 230 kRoundToZero, 231 kRoundToNearestEven, 232 kRoundToPositiveInfinity, 233 kRoundToNegativeInfinity 234 }; 235 236 // Template class that houses a floating pointer number. 237 // It exposes a number of constants based on the provided traits to 238 // assist in interpreting the bits of the value. 239 template <typename T, typename Traits = HexFloatTraits<T>> 240 class HexFloat { 241 public: 242 typedef typename Traits::uint_type uint_type; 243 typedef typename Traits::int_type int_type; 244 typedef typename Traits::underlying_type underlying_type; 245 typedef typename Traits::native_type native_type; 246 247 explicit HexFloat(T f) : value_(f) {} 248 249 T value() const { return value_; } 250 void set_value(T f) { value_ = f; } 251 252 // These are all written like this because it is convenient to have 253 // compile-time constants for all of these values. 254 255 // Pass-through values to save typing. 256 static const uint32_t num_used_bits = Traits::num_used_bits; 257 static const uint32_t exponent_bias = Traits::exponent_bias; 258 static const uint32_t num_exponent_bits = Traits::num_exponent_bits; 259 static const uint32_t num_fraction_bits = Traits::num_fraction_bits; 260 261 // Number of bits to shift left to set the highest relevant bit. 262 static const uint32_t top_bit_left_shift = num_used_bits - 1; 263 // How many nibbles (hex characters) the fractional part takes up. 264 static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4; 265 // If the fractional part does not fit evenly into a hex character (4-bits) 266 // then we have to left-shift to get rid of leading 0s. This is the amount 267 // we have to shift (might be 0). 268 static const uint32_t num_overflow_bits = 269 fraction_nibbles * 4 - num_fraction_bits; 270 271 // The representation of the fraction, not the actual bits. This 272 // includes the leading bit that is usually implicit. 273 static const uint_type fraction_represent_mask = 274 spvutils::SetBits<uint_type, 0, 275 num_fraction_bits + num_overflow_bits>::get; 276 277 // The topmost bit in the nibble-aligned fraction. 278 static const uint_type fraction_top_bit = 279 uint_type(1) << (num_fraction_bits + num_overflow_bits - 1); 280 281 // The least significant bit in the exponent, which is also the bit 282 // immediately to the left of the significand. 283 static const uint_type first_exponent_bit = uint_type(1) 284 << (num_fraction_bits); 285 286 // The mask for the encoded fraction. It does not include the 287 // implicit bit. 288 static const uint_type fraction_encode_mask = 289 spvutils::SetBits<uint_type, 0, num_fraction_bits>::get; 290 291 // The bit that is used as a sign. 292 static const uint_type sign_mask = uint_type(1) << top_bit_left_shift; 293 294 // The bits that represent the exponent. 295 static const uint_type exponent_mask = 296 spvutils::SetBits<uint_type, num_fraction_bits, num_exponent_bits>::get; 297 298 // How far left the exponent is shifted. 299 static const uint32_t exponent_left_shift = num_fraction_bits; 300 301 // How far from the right edge the fraction is shifted. 302 static const uint32_t fraction_right_shift = 303 static_cast<uint32_t>(sizeof(uint_type) * 8) - num_fraction_bits; 304 305 // The maximum representable unbiased exponent. 306 static const int_type max_exponent = 307 (exponent_mask >> num_fraction_bits) - exponent_bias; 308 // The minimum representable exponent for normalized numbers. 309 static const int_type min_exponent = -static_cast<int_type>(exponent_bias); 310 311 // Returns the bits associated with the value. 312 uint_type getBits() const { return spvutils::BitwiseCast<uint_type>(value_); } 313 314 // Returns the bits associated with the value, without the leading sign bit. 315 uint_type getUnsignedBits() const { 316 return static_cast<uint_type>(spvutils::BitwiseCast<uint_type>(value_) & 317 ~sign_mask); 318 } 319 320 // Returns the bits associated with the exponent, shifted to start at the 321 // lsb of the type. 322 const uint_type getExponentBits() const { 323 return static_cast<uint_type>((getBits() & exponent_mask) >> 324 num_fraction_bits); 325 } 326 327 // Returns the exponent in unbiased form. This is the exponent in the 328 // human-friendly form. 329 const int_type getUnbiasedExponent() const { 330 return static_cast<int_type>(getExponentBits() - exponent_bias); 331 } 332 333 // Returns just the significand bits from the value. 334 const uint_type getSignificandBits() const { 335 return getBits() & fraction_encode_mask; 336 } 337 338 // If the number was normalized, returns the unbiased exponent. 339 // If the number was denormal, normalize the exponent first. 340 const int_type getUnbiasedNormalizedExponent() const { 341 if ((getBits() & ~sign_mask) == 0) { // special case if everything is 0 342 return 0; 343 } 344 int_type exp = getUnbiasedExponent(); 345 if (exp == min_exponent) { // We are in denorm land. 346 uint_type significand_bits = getSignificandBits(); 347 while ((significand_bits & (first_exponent_bit >> 1)) == 0) { 348 significand_bits = static_cast<uint_type>(significand_bits << 1); 349 exp = static_cast<int_type>(exp - 1); 350 } 351 significand_bits &= fraction_encode_mask; 352 } 353 return exp; 354 } 355 356 // Returns the signficand after it has been normalized. 357 const uint_type getNormalizedSignificand() const { 358 int_type unbiased_exponent = getUnbiasedNormalizedExponent(); 359 uint_type significand = getSignificandBits(); 360 for (int_type i = unbiased_exponent; i <= min_exponent; ++i) { 361 significand = static_cast<uint_type>(significand << 1); 362 } 363 significand &= fraction_encode_mask; 364 return significand; 365 } 366 367 // Returns true if this number represents a negative value. 368 bool isNegative() const { return (getBits() & sign_mask) != 0; } 369 370 // Sets this HexFloat from the individual components. 371 // Note this assumes EVERY significand is normalized, and has an implicit 372 // leading one. This means that the only way that this method will set 0, 373 // is if you set a number so denormalized that it underflows. 374 // Do not use this method with raw bits extracted from a subnormal number, 375 // since subnormals do not have an implicit leading 1 in the significand. 376 // The significand is also expected to be in the 377 // lowest-most num_fraction_bits of the uint_type. 378 // The exponent is expected to be unbiased, meaning an exponent of 379 // 0 actually means 0. 380 // If underflow_round_up is set, then on underflow, if a number is non-0 381 // and would underflow, we round up to the smallest denorm. 382 void setFromSignUnbiasedExponentAndNormalizedSignificand( 383 bool negative, int_type exponent, uint_type significand, 384 bool round_denorm_up) { 385 bool significand_is_zero = significand == 0; 386 387 if (exponent <= min_exponent) { 388 // If this was denormalized, then we have to shift the bit on, meaning 389 // the significand is not zero. 390 significand_is_zero = false; 391 significand |= first_exponent_bit; 392 significand = static_cast<uint_type>(significand >> 1); 393 } 394 395 while (exponent < min_exponent) { 396 significand = static_cast<uint_type>(significand >> 1); 397 ++exponent; 398 } 399 400 if (exponent == min_exponent) { 401 if (significand == 0 && !significand_is_zero && round_denorm_up) { 402 significand = static_cast<uint_type>(0x1); 403 } 404 } 405 406 uint_type new_value = 0; 407 if (negative) { 408 new_value = static_cast<uint_type>(new_value | sign_mask); 409 } 410 exponent = static_cast<int_type>(exponent + exponent_bias); 411 assert(exponent >= 0); 412 413 // put it all together 414 exponent = static_cast<uint_type>((exponent << exponent_left_shift) & 415 exponent_mask); 416 significand = static_cast<uint_type>(significand & fraction_encode_mask); 417 new_value = static_cast<uint_type>(new_value | (exponent | significand)); 418 value_ = BitwiseCast<T>(new_value); 419 } 420 421 // Increments the significand of this number by the given amount. 422 // If this would spill the significand into the implicit bit, 423 // carry is set to true and the significand is shifted to fit into 424 // the correct location, otherwise carry is set to false. 425 // All significands and to_increment are assumed to be within the bounds 426 // for a valid significand. 427 static uint_type incrementSignificand(uint_type significand, 428 uint_type to_increment, bool* carry) { 429 significand = static_cast<uint_type>(significand + to_increment); 430 *carry = false; 431 if (significand & first_exponent_bit) { 432 *carry = true; 433 // The implicit 1-bit will have carried, so we should zero-out the 434 // top bit and shift back. 435 significand = static_cast<uint_type>(significand & ~first_exponent_bit); 436 significand = static_cast<uint_type>(significand >> 1); 437 } 438 return significand; 439 } 440 441 // These exist because MSVC throws warnings on negative right-shifts 442 // even if they are not going to be executed. Eg: 443 // constant_number < 0? 0: constant_number 444 // These convert the negative left-shifts into right shifts. 445 446 template <typename int_type> 447 uint_type negatable_left_shift(int_type N, uint_type val) 448 { 449 if(N >= 0) 450 return val << N; 451 452 return val >> -N; 453 } 454 455 template <typename int_type> 456 uint_type negatable_right_shift(int_type N, uint_type val) 457 { 458 if(N >= 0) 459 return val >> N; 460 461 return val << -N; 462 } 463 464 // Returns the significand, rounded to fit in a significand in 465 // other_T. This is shifted so that the most significant 466 // bit of the rounded number lines up with the most significant bit 467 // of the returned significand. 468 template <typename other_T> 469 typename other_T::uint_type getRoundedNormalizedSignificand( 470 round_direction dir, bool* carry_bit) { 471 typedef typename other_T::uint_type other_uint_type; 472 static const int_type num_throwaway_bits = 473 static_cast<int_type>(num_fraction_bits) - 474 static_cast<int_type>(other_T::num_fraction_bits); 475 476 static const uint_type last_significant_bit = 477 (num_throwaway_bits < 0) 478 ? 0 479 : negatable_left_shift(num_throwaway_bits, 1u); 480 static const uint_type first_rounded_bit = 481 (num_throwaway_bits < 1) 482 ? 0 483 : negatable_left_shift(num_throwaway_bits - 1, 1u); 484 485 static const uint_type throwaway_mask_bits = 486 num_throwaway_bits > 0 ? num_throwaway_bits : 0; 487 static const uint_type throwaway_mask = 488 spvutils::SetBits<uint_type, 0, throwaway_mask_bits>::get; 489 490 *carry_bit = false; 491 other_uint_type out_val = 0; 492 uint_type significand = getNormalizedSignificand(); 493 // If we are up-casting, then we just have to shift to the right location. 494 if (num_throwaway_bits <= 0) { 495 out_val = static_cast<other_uint_type>(significand); 496 uint_type shift_amount = static_cast<uint_type>(-num_throwaway_bits); 497 out_val = static_cast<other_uint_type>(out_val << shift_amount); 498 return out_val; 499 } 500 501 // If every non-representable bit is 0, then we don't have any casting to 502 // do. 503 if ((significand & throwaway_mask) == 0) { 504 return static_cast<other_uint_type>( 505 negatable_right_shift(num_throwaway_bits, significand)); 506 } 507 508 bool round_away_from_zero = false; 509 // We actually have to narrow the significand here, so we have to follow the 510 // rounding rules. 511 switch (dir) { 512 case kRoundToZero: 513 break; 514 case kRoundToPositiveInfinity: 515 round_away_from_zero = !isNegative(); 516 break; 517 case kRoundToNegativeInfinity: 518 round_away_from_zero = isNegative(); 519 break; 520 case kRoundToNearestEven: 521 // Have to round down, round bit is 0 522 if ((first_rounded_bit & significand) == 0) { 523 break; 524 } 525 if (((significand & throwaway_mask) & ~first_rounded_bit) != 0) { 526 // If any subsequent bit of the rounded portion is non-0 then we round 527 // up. 528 round_away_from_zero = true; 529 break; 530 } 531 // We are exactly half-way between 2 numbers, pick even. 532 if ((significand & last_significant_bit) != 0) { 533 // 1 for our last bit, round up. 534 round_away_from_zero = true; 535 break; 536 } 537 break; 538 } 539 540 if (round_away_from_zero) { 541 return static_cast<other_uint_type>( 542 negatable_right_shift(num_throwaway_bits, incrementSignificand( 543 significand, last_significant_bit, carry_bit))); 544 } else { 545 return static_cast<other_uint_type>( 546 negatable_right_shift(num_throwaway_bits, significand)); 547 } 548 } 549 550 // Casts this value to another HexFloat. If the cast is widening, 551 // then round_dir is ignored. If the cast is narrowing, then 552 // the result is rounded in the direction specified. 553 // This number will retain Nan and Inf values. 554 // It will also saturate to Inf if the number overflows, and 555 // underflow to (0 or min depending on rounding) if the number underflows. 556 template <typename other_T> 557 void castTo(other_T& other, round_direction round_dir) { 558 other = other_T(static_cast<typename other_T::native_type>(0)); 559 bool negate = isNegative(); 560 if (getUnsignedBits() == 0) { 561 if (negate) { 562 other.set_value(-other.value()); 563 } 564 return; 565 } 566 uint_type significand = getSignificandBits(); 567 bool carried = false; 568 typename other_T::uint_type rounded_significand = 569 getRoundedNormalizedSignificand<other_T>(round_dir, &carried); 570 571 int_type exponent = getUnbiasedExponent(); 572 if (exponent == min_exponent) { 573 // If we are denormal, normalize the exponent, so that we can encode 574 // easily. 575 exponent = static_cast<int_type>(exponent + 1); 576 for (uint_type check_bit = first_exponent_bit >> 1; check_bit != 0; 577 check_bit = static_cast<uint_type>(check_bit >> 1)) { 578 exponent = static_cast<int_type>(exponent - 1); 579 if (check_bit & significand) break; 580 } 581 } 582 583 bool is_nan = 584 (getBits() & exponent_mask) == exponent_mask && significand != 0; 585 bool is_inf = 586 !is_nan && 587 ((exponent + carried) > static_cast<int_type>(other_T::exponent_bias) || 588 (significand == 0 && (getBits() & exponent_mask) == exponent_mask)); 589 590 // If we are Nan or Inf we should pass that through. 591 if (is_inf) { 592 other.set_value(BitwiseCast<typename other_T::underlying_type>( 593 static_cast<typename other_T::uint_type>( 594 (negate ? other_T::sign_mask : 0) | other_T::exponent_mask))); 595 return; 596 } 597 if (is_nan) { 598 typename other_T::uint_type shifted_significand; 599 shifted_significand = static_cast<typename other_T::uint_type>( 600 negatable_left_shift( 601 static_cast<int_type>(other_T::num_fraction_bits) - 602 static_cast<int_type>(num_fraction_bits), significand)); 603 604 // We are some sort of Nan. We try to keep the bit-pattern of the Nan 605 // as close as possible. If we had to shift off bits so we are 0, then we 606 // just set the last bit. 607 other.set_value(BitwiseCast<typename other_T::underlying_type>( 608 static_cast<typename other_T::uint_type>( 609 (negate ? other_T::sign_mask : 0) | other_T::exponent_mask | 610 (shifted_significand == 0 ? 0x1 : shifted_significand)))); 611 return; 612 } 613 614 bool round_underflow_up = 615 isNegative() ? round_dir == kRoundToNegativeInfinity 616 : round_dir == kRoundToPositiveInfinity; 617 typedef typename other_T::int_type other_int_type; 618 // setFromSignUnbiasedExponentAndNormalizedSignificand will 619 // zero out any underflowing value (but retain the sign). 620 other.setFromSignUnbiasedExponentAndNormalizedSignificand( 621 negate, static_cast<other_int_type>(exponent), rounded_significand, 622 round_underflow_up); 623 return; 624 } 625 626 private: 627 T value_; 628 629 static_assert(num_used_bits == 630 Traits::num_exponent_bits + Traits::num_fraction_bits + 1, 631 "The number of bits do not fit"); 632 static_assert(sizeof(T) == sizeof(uint_type), "The type sizes do not match"); 633 }; 634 635 // Returns 4 bits represented by the hex character. 636 inline uint8_t get_nibble_from_character(int character) { 637 const char* dec = "0123456789"; 638 const char* lower = "abcdef"; 639 const char* upper = "ABCDEF"; 640 const char* p = nullptr; 641 if ((p = strchr(dec, character))) { 642 return static_cast<uint8_t>(p - dec); 643 } else if ((p = strchr(lower, character))) { 644 return static_cast<uint8_t>(p - lower + 0xa); 645 } else if ((p = strchr(upper, character))) { 646 return static_cast<uint8_t>(p - upper + 0xa); 647 } 648 649 assert(false && "This was called with a non-hex character"); 650 return 0; 651 } 652 653 // Outputs the given HexFloat to the stream. 654 template <typename T, typename Traits> 655 std::ostream& operator<<(std::ostream& os, const HexFloat<T, Traits>& value) { 656 typedef HexFloat<T, Traits> HF; 657 typedef typename HF::uint_type uint_type; 658 typedef typename HF::int_type int_type; 659 660 static_assert(HF::num_used_bits != 0, 661 "num_used_bits must be non-zero for a valid float"); 662 static_assert(HF::num_exponent_bits != 0, 663 "num_exponent_bits must be non-zero for a valid float"); 664 static_assert(HF::num_fraction_bits != 0, 665 "num_fractin_bits must be non-zero for a valid float"); 666 667 const uint_type bits = spvutils::BitwiseCast<uint_type>(value.value()); 668 const char* const sign = (bits & HF::sign_mask) ? "-" : ""; 669 const uint_type exponent = static_cast<uint_type>( 670 (bits & HF::exponent_mask) >> HF::num_fraction_bits); 671 672 uint_type fraction = static_cast<uint_type>((bits & HF::fraction_encode_mask) 673 << HF::num_overflow_bits); 674 675 const bool is_zero = exponent == 0 && fraction == 0; 676 const bool is_denorm = exponent == 0 && !is_zero; 677 678 // exponent contains the biased exponent we have to convert it back into 679 // the normal range. 680 int_type int_exponent = static_cast<int_type>(exponent - HF::exponent_bias); 681 // If the number is all zeros, then we actually have to NOT shift the 682 // exponent. 683 int_exponent = is_zero ? 0 : int_exponent; 684 685 // If we are denorm, then start shifting, and decreasing the exponent until 686 // our leading bit is 1. 687 688 if (is_denorm) { 689 while ((fraction & HF::fraction_top_bit) == 0) { 690 fraction = static_cast<uint_type>(fraction << 1); 691 int_exponent = static_cast<int_type>(int_exponent - 1); 692 } 693 // Since this is denormalized, we have to consume the leading 1 since it 694 // will end up being implicit. 695 fraction = static_cast<uint_type>(fraction << 1); // eat the leading 1 696 fraction &= HF::fraction_represent_mask; 697 } 698 699 uint_type fraction_nibbles = HF::fraction_nibbles; 700 // We do not have to display any trailing 0s, since this represents the 701 // fractional part. 702 while (fraction_nibbles > 0 && (fraction & 0xF) == 0) { 703 // Shift off any trailing values; 704 fraction = static_cast<uint_type>(fraction >> 4); 705 --fraction_nibbles; 706 } 707 708 const auto saved_flags = os.flags(); 709 const auto saved_fill = os.fill(); 710 711 os << sign << "0x" << (is_zero ? '0' : '1'); 712 if (fraction_nibbles) { 713 // Make sure to keep the leading 0s in place, since this is the fractional 714 // part. 715 os << "." << std::setw(static_cast<int>(fraction_nibbles)) 716 << std::setfill('0') << std::hex << fraction; 717 } 718 os << "p" << std::dec << (int_exponent >= 0 ? "+" : "") << int_exponent; 719 720 os.flags(saved_flags); 721 os.fill(saved_fill); 722 723 return os; 724 } 725 726 // Returns true if negate_value is true and the next character on the 727 // input stream is a plus or minus sign. In that case we also set the fail bit 728 // on the stream and set the value to the zero value for its type. 729 template <typename T, typename Traits> 730 inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value, 731 HexFloat<T, Traits>& value) { 732 if (negate_value) { 733 auto next_char = is.peek(); 734 if (next_char == '-' || next_char == '+') { 735 // Fail the parse. Emulate standard behaviour by setting the value to 736 // the zero value, and set the fail bit on the stream. 737 value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type(0)); 738 is.setstate(std::ios_base::failbit); 739 return true; 740 } 741 } 742 return false; 743 } 744 745 // Parses a floating point number from the given stream and stores it into the 746 // value parameter. 747 // If negate_value is true then the number may not have a leading minus or 748 // plus, and if it successfully parses, then the number is negated before 749 // being stored into the value parameter. 750 // If the value cannot be correctly parsed or overflows the target floating 751 // point type, then set the fail bit on the stream. 752 // TODO(dneto): Promise C++11 standard behavior in how the value is set in 753 // the error case, but only after all target platforms implement it correctly. 754 // In particular, the Microsoft C++ runtime appears to be out of spec. 755 template <typename T, typename Traits> 756 inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value, 757 HexFloat<T, Traits>& value) { 758 if (RejectParseDueToLeadingSign(is, negate_value, value)) { 759 return is; 760 } 761 T val; 762 is >> val; 763 if (negate_value) { 764 val = -val; 765 } 766 value.set_value(val); 767 // In the failure case, map -0.0 to 0.0. 768 if (is.fail() && value.getUnsignedBits() == 0u) { 769 value = HexFloat<T, Traits>(typename HexFloat<T, Traits>::uint_type(0)); 770 } 771 if (val.isInfinity()) { 772 // Fail the parse. Emulate standard behaviour by setting the value to 773 // the closest normal value, and set the fail bit on the stream. 774 value.set_value((value.isNegative() || negate_value) ? T::lowest() 775 : T::max()); 776 is.setstate(std::ios_base::failbit); 777 } 778 return is; 779 } 780 781 // Specialization of ParseNormalFloat for FloatProxy<Float16> values. 782 // This will parse the float as it were a 32-bit floating point number, 783 // and then round it down to fit into a Float16 value. 784 // The number is rounded towards zero. 785 // If negate_value is true then the number may not have a leading minus or 786 // plus, and if it successfully parses, then the number is negated before 787 // being stored into the value parameter. 788 // If the value cannot be correctly parsed or overflows the target floating 789 // point type, then set the fail bit on the stream. 790 // TODO(dneto): Promise C++11 standard behavior in how the value is set in 791 // the error case, but only after all target platforms implement it correctly. 792 // In particular, the Microsoft C++ runtime appears to be out of spec. 793 template <> 794 inline std::istream& 795 ParseNormalFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>( 796 std::istream& is, bool negate_value, 797 HexFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>& value) { 798 // First parse as a 32-bit float. 799 HexFloat<FloatProxy<float>> float_val(0.0f); 800 ParseNormalFloat(is, negate_value, float_val); 801 802 // Then convert to 16-bit float, saturating at infinities, and 803 // rounding toward zero. 804 float_val.castTo(value, kRoundToZero); 805 806 // Overflow on 16-bit behaves the same as for 32- and 64-bit: set the 807 // fail bit and set the lowest or highest value. 808 if (Float16::isInfinity(value.value().getAsFloat())) { 809 value.set_value(value.isNegative() ? Float16::lowest() : Float16::max()); 810 is.setstate(std::ios_base::failbit); 811 } 812 return is; 813 } 814 815 // Reads a HexFloat from the given stream. 816 // If the float is not encoded as a hex-float then it will be parsed 817 // as a regular float. 818 // This may fail if your stream does not support at least one unget. 819 // Nan values can be encoded with "0x1.<not zero>p+exponent_bias". 820 // This would normally overflow a float and round to 821 // infinity but this special pattern is the exact representation for a NaN, 822 // and therefore is actually encoded as the correct NaN. To encode inf, 823 // either 0x0p+exponent_bias can be specified or any exponent greater than 824 // exponent_bias. 825 // Examples using IEEE 32-bit float encoding. 826 // 0x1.0p+128 (+inf) 827 // -0x1.0p-128 (-inf) 828 // 829 // 0x1.1p+128 (+Nan) 830 // -0x1.1p+128 (-Nan) 831 // 832 // 0x1p+129 (+inf) 833 // -0x1p+129 (-inf) 834 template <typename T, typename Traits> 835 std::istream& operator>>(std::istream& is, HexFloat<T, Traits>& value) { 836 using HF = HexFloat<T, Traits>; 837 using uint_type = typename HF::uint_type; 838 using int_type = typename HF::int_type; 839 840 value.set_value(static_cast<typename HF::native_type>(0.f)); 841 842 if (is.flags() & std::ios::skipws) { 843 // If the user wants to skip whitespace , then we should obey that. 844 while (std::isspace(is.peek())) { 845 is.get(); 846 } 847 } 848 849 auto next_char = is.peek(); 850 bool negate_value = false; 851 852 if (next_char != '-' && next_char != '0') { 853 return ParseNormalFloat(is, negate_value, value); 854 } 855 856 if (next_char == '-') { 857 negate_value = true; 858 is.get(); 859 next_char = is.peek(); 860 } 861 862 if (next_char == '0') { 863 is.get(); // We may have to unget this. 864 auto maybe_hex_start = is.peek(); 865 if (maybe_hex_start != 'x' && maybe_hex_start != 'X') { 866 is.unget(); 867 return ParseNormalFloat(is, negate_value, value); 868 } else { 869 is.get(); // Throw away the 'x'; 870 } 871 } else { 872 return ParseNormalFloat(is, negate_value, value); 873 } 874 875 // This "looks" like a hex-float so treat it as one. 876 bool seen_p = false; 877 bool seen_dot = false; 878 uint_type fraction_index = 0; 879 880 uint_type fraction = 0; 881 int_type exponent = HF::exponent_bias; 882 883 // Strip off leading zeros so we don't have to special-case them later. 884 while ((next_char = is.peek()) == '0') { 885 is.get(); 886 } 887 888 bool is_denorm = 889 true; // Assume denorm "representation" until we hear otherwise. 890 // NB: This does not mean the value is actually denorm, 891 // it just means that it was written 0. 892 bool bits_written = false; // Stays false until we write a bit. 893 while (!seen_p && !seen_dot) { 894 // Handle characters that are left of the fractional part. 895 if (next_char == '.') { 896 seen_dot = true; 897 } else if (next_char == 'p') { 898 seen_p = true; 899 } else if (::isxdigit(next_char)) { 900 // We know this is not denormalized since we have stripped all leading 901 // zeroes and we are not a ".". 902 is_denorm = false; 903 int number = get_nibble_from_character(next_char); 904 for (int i = 0; i < 4; ++i, number <<= 1) { 905 uint_type write_bit = (number & 0x8) ? 0x1 : 0x0; 906 if (bits_written) { 907 // If we are here the bits represented belong in the fractional 908 // part of the float, and we have to adjust the exponent accordingly. 909 fraction = static_cast<uint_type>( 910 fraction | 911 static_cast<uint_type>( 912 write_bit << (HF::top_bit_left_shift - fraction_index++))); 913 exponent = static_cast<int_type>(exponent + 1); 914 } 915 bits_written |= write_bit != 0; 916 } 917 } else { 918 // We have not found our exponent yet, so we have to fail. 919 is.setstate(std::ios::failbit); 920 return is; 921 } 922 is.get(); 923 next_char = is.peek(); 924 } 925 bits_written = false; 926 while (seen_dot && !seen_p) { 927 // Handle only fractional parts now. 928 if (next_char == 'p') { 929 seen_p = true; 930 } else if (::isxdigit(next_char)) { 931 int number = get_nibble_from_character(next_char); 932 for (int i = 0; i < 4; ++i, number <<= 1) { 933 uint_type write_bit = (number & 0x8) ? 0x01 : 0x00; 934 bits_written |= write_bit != 0; 935 if (is_denorm && !bits_written) { 936 // Handle modifying the exponent here this way we can handle 937 // an arbitrary number of hex values without overflowing our 938 // integer. 939 exponent = static_cast<int_type>(exponent - 1); 940 } else { 941 fraction = static_cast<uint_type>( 942 fraction | 943 static_cast<uint_type>( 944 write_bit << (HF::top_bit_left_shift - fraction_index++))); 945 } 946 } 947 } else { 948 // We still have not found our 'p' exponent yet, so this is not a valid 949 // hex-float. 950 is.setstate(std::ios::failbit); 951 return is; 952 } 953 is.get(); 954 next_char = is.peek(); 955 } 956 957 bool seen_sign = false; 958 int8_t exponent_sign = 1; 959 int_type written_exponent = 0; 960 while (true) { 961 if ((next_char == '-' || next_char == '+')) { 962 if (seen_sign) { 963 is.setstate(std::ios::failbit); 964 return is; 965 } 966 seen_sign = true; 967 exponent_sign = (next_char == '-') ? -1 : 1; 968 } else if (::isdigit(next_char)) { 969 // Hex-floats express their exponent as decimal. 970 written_exponent = static_cast<int_type>(written_exponent * 10); 971 written_exponent = 972 static_cast<int_type>(written_exponent + (next_char - '0')); 973 } else { 974 break; 975 } 976 is.get(); 977 next_char = is.peek(); 978 } 979 980 written_exponent = static_cast<int_type>(written_exponent * exponent_sign); 981 exponent = static_cast<int_type>(exponent + written_exponent); 982 983 bool is_zero = is_denorm && (fraction == 0); 984 if (is_denorm && !is_zero) { 985 fraction = static_cast<uint_type>(fraction << 1); 986 exponent = static_cast<int_type>(exponent - 1); 987 } else if (is_zero) { 988 exponent = 0; 989 } 990 991 if (exponent <= 0 && !is_zero) { 992 fraction = static_cast<uint_type>(fraction >> 1); 993 fraction |= static_cast<uint_type>(1) << HF::top_bit_left_shift; 994 } 995 996 fraction = (fraction >> HF::fraction_right_shift) & HF::fraction_encode_mask; 997 998 const int_type max_exponent = 999 SetBits<uint_type, 0, HF::num_exponent_bits>::get; 1000 1001 // Handle actual denorm numbers 1002 while (exponent < 0 && !is_zero) { 1003 fraction = static_cast<uint_type>(fraction >> 1); 1004 exponent = static_cast<int_type>(exponent + 1); 1005 1006 fraction &= HF::fraction_encode_mask; 1007 if (fraction == 0) { 1008 // We have underflowed our fraction. We should clamp to zero. 1009 is_zero = true; 1010 exponent = 0; 1011 } 1012 } 1013 1014 // We have overflowed so we should be inf/-inf. 1015 if (exponent > max_exponent) { 1016 exponent = max_exponent; 1017 fraction = 0; 1018 } 1019 1020 uint_type output_bits = static_cast<uint_type>( 1021 static_cast<uint_type>(negate_value ? 1 : 0) << HF::top_bit_left_shift); 1022 output_bits |= fraction; 1023 1024 uint_type shifted_exponent = static_cast<uint_type>( 1025 static_cast<uint_type>(exponent << HF::exponent_left_shift) & 1026 HF::exponent_mask); 1027 output_bits |= shifted_exponent; 1028 1029 T output_float = spvutils::BitwiseCast<T>(output_bits); 1030 value.set_value(output_float); 1031 1032 return is; 1033 } 1034 1035 // Writes a FloatProxy value to a stream. 1036 // Zero and normal numbers are printed in the usual notation, but with 1037 // enough digits to fully reproduce the value. Other values (subnormal, 1038 // NaN, and infinity) are printed as a hex float. 1039 template <typename T> 1040 std::ostream& operator<<(std::ostream& os, const FloatProxy<T>& value) { 1041 auto float_val = value.getAsFloat(); 1042 switch (std::fpclassify(float_val)) { 1043 case FP_ZERO: 1044 case FP_NORMAL: { 1045 auto saved_precision = os.precision(); 1046 os.precision(std::numeric_limits<T>::digits10); 1047 os << float_val; 1048 os.precision(saved_precision); 1049 } break; 1050 default: 1051 os << HexFloat<FloatProxy<T>>(value); 1052 break; 1053 } 1054 return os; 1055 } 1056 1057 template <> 1058 inline std::ostream& operator<<<Float16>(std::ostream& os, 1059 const FloatProxy<Float16>& value) { 1060 os << HexFloat<FloatProxy<Float16>>(value); 1061 return os; 1062 } 1063 } 1064 1065 #endif // LIBSPIRV_UTIL_HEX_FLOAT_H_ 1066