1 //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // ----------------------------------------------------------------------------- 10 // **** WARNING **** 11 // This file is shared with libc++. You should also be careful when adding 12 // dependencies to this file, since it needs to build for all libc++ targets. 13 // ----------------------------------------------------------------------------- 14 15 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 16 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 17 18 #include "src/__support/CPP/bit.h" 19 #include "src/__support/CPP/type_traits.h" 20 #include "src/__support/common.h" 21 #include "src/__support/libc_assert.h" // LIBC_ASSERT 22 #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR 23 #include "src/__support/macros/config.h" 24 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128 25 #include "src/__support/math_extras.h" // mask_trailing_ones 26 #include "src/__support/sign.h" // Sign 27 #include "src/__support/uint128.h" 28 29 #include <stdint.h> 30 31 namespace LIBC_NAMESPACE_DECL { 32 namespace fputil { 33 34 // The supported floating point types. 35 enum class FPType { 36 IEEE754_Binary16, 37 IEEE754_Binary32, 38 IEEE754_Binary64, 39 IEEE754_Binary128, 40 X86_Binary80, 41 }; 42 43 // The classes hierarchy is as follows: 44 // 45 // ┌───────────────────┐ 46 // │ FPLayout<FPType> │ 47 // └─────────▲─────────┘ 48 // │ 49 // ┌─────────┴─────────┐ 50 // │ FPStorage<FPType> │ 51 // └─────────▲─────────┘ 52 // │ 53 // ┌────────────┴─────────────┐ 54 // │ │ 55 // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐ 56 // │ FPRepSem<FPType> │ │ FPRepSem<FPType::X86_Binary80 │ 57 // └────────▲─────────┘ └──────────────▲──────────────────┘ 58 // │ │ 59 // └────────────┬─────────────┘ 60 // │ 61 // ┌───────┴───────┐ 62 // │ FPRepImpl<T> │ 63 // └───────▲───────┘ 64 // │ 65 // ┌────────┴────────┐ 66 // ┌─────┴─────┐ ┌─────┴─────┐ 67 // │ FPRep<T> │ │ FPBits<T> │ 68 // └───────────┘ └───────────┘ 69 // 70 // - 'FPLayout' defines only a few constants, namely the 'StorageType' and 71 // length of the sign, the exponent, fraction and significand parts. 72 // - 'FPStorage' builds more constants on top of those from 'FPLayout' like 73 // exponent bias and masks. It also holds the bit representation of the 74 // floating point as a 'StorageType' type and defines tools to assemble or 75 // test these parts. 76 // - 'FPRepSem' defines functions to interact semantically with the floating 77 // point representation. The default implementation is the one for 'IEEE754', 78 // a specialization is provided for X86 Extended Precision. 79 // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to 80 // all implementations or build on the ones in 'FPRepSem'. 81 // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep' 82 // instances when using Builders (static functions to create values). 83 // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the 84 // native C++ floating point type instead of 'FPType'. An additional 'get_val' 85 // function allows getting the C++ floating point type value back. Builders 86 // called from 'FPBits' return 'FPBits' instances. 87 88 namespace internal { 89 90 // Defines the layout (sign, exponent, significand) of a floating point type in 91 // memory. It also defines its associated StorageType, i.e., the unsigned 92 // integer type used to manipulate its representation. 93 // Additionally we provide the fractional part length, i.e., the number of bits 94 // after the decimal dot when the number is in normal form. 95 template <FPType> struct FPLayout {}; 96 97 template <> struct FPLayout<FPType::IEEE754_Binary16> { 98 using StorageType = uint16_t; 99 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 100 LIBC_INLINE_VAR static constexpr int EXP_LEN = 5; 101 LIBC_INLINE_VAR static constexpr int SIG_LEN = 10; 102 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 103 }; 104 105 template <> struct FPLayout<FPType::IEEE754_Binary32> { 106 using StorageType = uint32_t; 107 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 108 LIBC_INLINE_VAR static constexpr int EXP_LEN = 8; 109 LIBC_INLINE_VAR static constexpr int SIG_LEN = 23; 110 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 111 }; 112 113 template <> struct FPLayout<FPType::IEEE754_Binary64> { 114 using StorageType = uint64_t; 115 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 116 LIBC_INLINE_VAR static constexpr int EXP_LEN = 11; 117 LIBC_INLINE_VAR static constexpr int SIG_LEN = 52; 118 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 119 }; 120 121 template <> struct FPLayout<FPType::IEEE754_Binary128> { 122 using StorageType = UInt128; 123 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 124 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 125 LIBC_INLINE_VAR static constexpr int SIG_LEN = 112; 126 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN; 127 }; 128 129 template <> struct FPLayout<FPType::X86_Binary80> { 130 #if __SIZEOF_LONG_DOUBLE__ == 12 131 using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>; 132 #else 133 using StorageType = UInt128; 134 #endif 135 LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1; 136 LIBC_INLINE_VAR static constexpr int EXP_LEN = 15; 137 LIBC_INLINE_VAR static constexpr int SIG_LEN = 64; 138 LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1; 139 }; 140 141 // FPStorage derives useful constants from the FPLayout above. 142 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> { 143 using UP = FPLayout<fp_type>; 144 145 using UP::EXP_LEN; // The number of bits for the *exponent* part 146 using UP::SIG_LEN; // The number of bits for the *significand* part 147 using UP::SIGN_LEN; // The number of bits for the *sign* part 148 // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`. 149 LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN; 150 151 // The number of bits after the decimal dot when the number is in normal form. 152 using UP::FRACTION_LEN; 153 154 // An unsigned integer that is wide enough to contain all of the floating 155 // point bits. 156 using StorageType = typename UP::StorageType; 157 158 // The number of bits in StorageType. 159 LIBC_INLINE_VAR static constexpr int STORAGE_LEN = 160 sizeof(StorageType) * CHAR_BIT; 161 static_assert(STORAGE_LEN >= TOTAL_LEN); 162 163 // The exponent bias. Always positive. 164 LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS = 165 (1U << (EXP_LEN - 1U)) - 1U; 166 static_assert(EXP_BIAS > 0); 167 168 // The bit pattern that keeps only the *significand* part. 169 LIBC_INLINE_VAR static constexpr StorageType SIG_MASK = 170 mask_trailing_ones<StorageType, SIG_LEN>(); 171 // The bit pattern that keeps only the *exponent* part. 172 LIBC_INLINE_VAR static constexpr StorageType EXP_MASK = 173 mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN; 174 // The bit pattern that keeps only the *sign* part. 175 LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK = 176 mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN); 177 // The bit pattern that keeps only the *exponent + significand* part. 178 LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK = 179 mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>(); 180 // The bit pattern that keeps only the *sign + exponent + significand* part. 181 LIBC_INLINE_VAR static constexpr StorageType FP_MASK = 182 mask_trailing_ones<StorageType, TOTAL_LEN>(); 183 // The bit pattern that keeps only the *fraction* part. 184 // i.e., the *significand* without the leading one. 185 LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK = 186 mask_trailing_ones<StorageType, FRACTION_LEN>(); 187 188 static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint"); 189 static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover"); 190 191 protected: 192 // Merge bits from 'a' and 'b' values according to 'mask'. 193 // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when 194 // corresponding bits are ones. 195 LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b, 196 StorageType mask) { 197 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 198 return a ^ ((a ^ b) & mask); 199 } 200 201 // A stongly typed integer that prevents mixing and matching integers with 202 // different semantics. 203 template <typename T> struct TypedInt { 204 using value_type = T; 205 LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {} 206 LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default; 207 LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default; 208 209 LIBC_INLINE constexpr explicit operator T() const { return value; } 210 211 LIBC_INLINE constexpr StorageType to_storage_type() const { 212 return StorageType(value); 213 } 214 215 LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) { 216 return a.value == b.value; 217 } 218 LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) { 219 return a.value != b.value; 220 } 221 222 protected: 223 T value; 224 }; 225 226 // An opaque type to store a floating point exponent. 227 // We define special values but it is valid to create arbitrary values as long 228 // as they are in the range [min, max]. 229 struct Exponent : public TypedInt<int32_t> { 230 using UP = TypedInt<int32_t>; 231 using UP::UP; 232 LIBC_INLINE static constexpr auto subnormal() { 233 return Exponent(-EXP_BIAS); 234 } 235 LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); } 236 LIBC_INLINE static constexpr auto zero() { return Exponent(0); } 237 LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); } 238 LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); } 239 }; 240 241 // An opaque type to store a floating point biased exponent. 242 // We define special values but it is valid to create arbitrary values as long 243 // as they are in the range [zero, bits_all_ones]. 244 // Values greater than bits_all_ones are truncated. 245 struct BiasedExponent : public TypedInt<uint32_t> { 246 using UP = TypedInt<uint32_t>; 247 using UP::UP; 248 249 LIBC_INLINE constexpr BiasedExponent(Exponent exp) 250 : UP(static_cast<int32_t>(exp) + EXP_BIAS) {} 251 252 // Cast operator to get convert from BiasedExponent to Exponent. 253 LIBC_INLINE constexpr operator Exponent() const { 254 return Exponent(UP::value - EXP_BIAS); 255 } 256 257 LIBC_INLINE constexpr BiasedExponent &operator++() { 258 LIBC_ASSERT(*this != BiasedExponent(Exponent::inf())); 259 ++UP::value; 260 return *this; 261 } 262 263 LIBC_INLINE constexpr BiasedExponent &operator--() { 264 LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal())); 265 --UP::value; 266 return *this; 267 } 268 }; 269 270 // An opaque type to store a floating point significand. 271 // We define special values but it is valid to create arbitrary values as long 272 // as they are in the range [zero, bits_all_ones]. 273 // Note that the semantics of the Significand are implementation dependent. 274 // Values greater than bits_all_ones are truncated. 275 struct Significand : public TypedInt<StorageType> { 276 using UP = TypedInt<StorageType>; 277 using UP::UP; 278 279 LIBC_INLINE friend constexpr Significand operator|(const Significand a, 280 const Significand b) { 281 return Significand( 282 StorageType(a.to_storage_type() | b.to_storage_type())); 283 } 284 LIBC_INLINE friend constexpr Significand operator^(const Significand a, 285 const Significand b) { 286 return Significand( 287 StorageType(a.to_storage_type() ^ b.to_storage_type())); 288 } 289 LIBC_INLINE friend constexpr Significand operator>>(const Significand a, 290 int shift) { 291 return Significand(StorageType(a.to_storage_type() >> shift)); 292 } 293 294 LIBC_INLINE static constexpr auto zero() { 295 return Significand(StorageType(0)); 296 } 297 LIBC_INLINE static constexpr auto lsb() { 298 return Significand(StorageType(1)); 299 } 300 LIBC_INLINE static constexpr auto msb() { 301 return Significand(StorageType(1) << (SIG_LEN - 1)); 302 } 303 LIBC_INLINE static constexpr auto bits_all_ones() { 304 return Significand(SIG_MASK); 305 } 306 }; 307 308 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) { 309 return (exp.to_storage_type() << SIG_LEN) & EXP_MASK; 310 } 311 312 LIBC_INLINE static constexpr StorageType encode(Significand value) { 313 return value.to_storage_type() & SIG_MASK; 314 } 315 316 LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp, 317 Significand sig) { 318 return encode(exp) | encode(sig); 319 } 320 321 LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp, 322 Significand sig) { 323 if (sign.is_neg()) 324 return SIGN_MASK | encode(exp, sig); 325 return encode(exp, sig); 326 } 327 328 // The floating point number representation as an unsigned integer. 329 StorageType bits{}; 330 331 LIBC_INLINE constexpr FPStorage() : bits(0) {} 332 LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {} 333 334 // Observers 335 LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; } 336 LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; } 337 LIBC_INLINE constexpr StorageType exp_sig_bits() const { 338 return bits & EXP_SIG_MASK; 339 } 340 341 // Parts 342 LIBC_INLINE constexpr BiasedExponent biased_exponent() const { 343 return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN)); 344 } 345 LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) { 346 bits = merge(bits, encode(biased), EXP_MASK); 347 } 348 349 public: 350 LIBC_INLINE constexpr Sign sign() const { 351 return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS; 352 } 353 LIBC_INLINE constexpr void set_sign(Sign signVal) { 354 if (sign() != signVal) 355 bits ^= SIGN_MASK; 356 } 357 }; 358 359 // This layer defines all functions that are specific to how the the floating 360 // point type is encoded. It enables constructions, modification and observation 361 // of values manipulated as 'StorageType'. 362 template <FPType fp_type, typename RetT> 363 struct FPRepSem : public FPStorage<fp_type> { 364 using UP = FPStorage<fp_type>; 365 using typename UP::StorageType; 366 using UP::FRACTION_LEN; 367 using UP::FRACTION_MASK; 368 369 protected: 370 using typename UP::Exponent; 371 using typename UP::Significand; 372 using UP::bits; 373 using UP::encode; 374 using UP::exp_bits; 375 using UP::exp_sig_bits; 376 using UP::sig_bits; 377 using UP::UP; 378 379 public: 380 // Builders 381 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 382 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 383 } 384 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 385 return RetT(encode(sign, Exponent::zero(), Significand::zero())); 386 } 387 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 388 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 389 } 390 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 391 return RetT( 392 encode(sign, Exponent::subnormal(), Significand::bits_all_ones())); 393 } 394 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 395 return RetT(encode(sign, Exponent::min(), Significand::zero())); 396 } 397 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 398 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 399 } 400 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 401 return RetT(encode(sign, Exponent::inf(), Significand::zero())); 402 } 403 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 404 StorageType v = 0) { 405 return RetT(encode(sign, Exponent::inf(), 406 (v ? Significand(v) : (Significand::msb() >> 1)))); 407 } 408 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 409 StorageType v = 0) { 410 return RetT( 411 encode(sign, Exponent::inf(), Significand::msb() | Significand(v))); 412 } 413 414 // Observers 415 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 416 LIBC_INLINE constexpr bool is_nan() const { 417 return exp_sig_bits() > encode(Exponent::inf(), Significand::zero()); 418 } 419 LIBC_INLINE constexpr bool is_quiet_nan() const { 420 return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb()); 421 } 422 LIBC_INLINE constexpr bool is_signaling_nan() const { 423 return is_nan() && !is_quiet_nan(); 424 } 425 LIBC_INLINE constexpr bool is_inf() const { 426 return exp_sig_bits() == encode(Exponent::inf(), Significand::zero()); 427 } 428 LIBC_INLINE constexpr bool is_finite() const { 429 return exp_bits() != encode(Exponent::inf()); 430 } 431 LIBC_INLINE 432 constexpr bool is_subnormal() const { 433 return exp_bits() == encode(Exponent::subnormal()); 434 } 435 LIBC_INLINE constexpr bool is_normal() const { 436 return is_finite() && !is_subnormal(); 437 } 438 LIBC_INLINE constexpr RetT next_toward_inf() const { 439 if (is_finite()) 440 return RetT(bits + StorageType(1)); 441 return RetT(bits); 442 } 443 444 // Returns the mantissa with the implicit bit set iff the current 445 // value is a valid normal number. 446 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 447 if (is_subnormal()) 448 return sig_bits(); 449 return (StorageType(1) << UP::SIG_LEN) | sig_bits(); 450 } 451 }; 452 453 // Specialization for the X86 Extended Precision type. 454 template <typename RetT> 455 struct FPRepSem<FPType::X86_Binary80, RetT> 456 : public FPStorage<FPType::X86_Binary80> { 457 using UP = FPStorage<FPType::X86_Binary80>; 458 using typename UP::StorageType; 459 using UP::FRACTION_LEN; 460 using UP::FRACTION_MASK; 461 462 // The x86 80 bit float represents the leading digit of the mantissa 463 // explicitly. This is the mask for that bit. 464 static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1) 465 << FRACTION_LEN; 466 // The X80 significand is made of an explicit bit and the fractional part. 467 static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0, 468 "the explicit bit and the fractional part should not overlap"); 469 static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK, 470 "the explicit bit and the fractional part should cover the " 471 "whole significand"); 472 473 protected: 474 using typename UP::Exponent; 475 using typename UP::Significand; 476 using UP::encode; 477 using UP::UP; 478 479 public: 480 // Builders 481 LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) { 482 return RetT(encode(sign, Exponent::subnormal(), Significand::zero())); 483 } 484 LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) { 485 return RetT(encode(sign, Exponent::zero(), Significand::msb())); 486 } 487 LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) { 488 return RetT(encode(sign, Exponent::subnormal(), Significand::lsb())); 489 } 490 LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) { 491 return RetT(encode(sign, Exponent::subnormal(), 492 Significand::bits_all_ones() ^ Significand::msb())); 493 } 494 LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) { 495 return RetT(encode(sign, Exponent::min(), Significand::msb())); 496 } 497 LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) { 498 return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones())); 499 } 500 LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) { 501 return RetT(encode(sign, Exponent::inf(), Significand::msb())); 502 } 503 LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS, 504 StorageType v = 0) { 505 return RetT(encode(sign, Exponent::inf(), 506 Significand::msb() | 507 (v ? Significand(v) : (Significand::msb() >> 2)))); 508 } 509 LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS, 510 StorageType v = 0) { 511 return RetT(encode(sign, Exponent::inf(), 512 Significand::msb() | (Significand::msb() >> 1) | 513 Significand(v))); 514 } 515 516 // Observers 517 LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; } 518 LIBC_INLINE constexpr bool is_nan() const { 519 // Most encoding forms from the table found in 520 // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format 521 // are interpreted as NaN. 522 // More precisely : 523 // - Pseudo-Infinity 524 // - Pseudo Not a Number 525 // - Signalling Not a Number 526 // - Floating-point Indefinite 527 // - Quiet Not a Number 528 // - Unnormal 529 // This can be reduced to the following logic: 530 if (exp_bits() == encode(Exponent::inf())) 531 return !is_inf(); 532 if (exp_bits() != encode(Exponent::subnormal())) 533 return (sig_bits() & encode(Significand::msb())) == 0; 534 return false; 535 } 536 LIBC_INLINE constexpr bool is_quiet_nan() const { 537 return exp_sig_bits() >= 538 encode(Exponent::inf(), 539 Significand::msb() | (Significand::msb() >> 1)); 540 } 541 LIBC_INLINE constexpr bool is_signaling_nan() const { 542 return is_nan() && !is_quiet_nan(); 543 } 544 LIBC_INLINE constexpr bool is_inf() const { 545 return exp_sig_bits() == encode(Exponent::inf(), Significand::msb()); 546 } 547 LIBC_INLINE constexpr bool is_finite() const { 548 return !is_inf() && !is_nan(); 549 } 550 LIBC_INLINE 551 constexpr bool is_subnormal() const { 552 return exp_bits() == encode(Exponent::subnormal()); 553 } 554 LIBC_INLINE constexpr bool is_normal() const { 555 const auto exp = exp_bits(); 556 if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf())) 557 return false; 558 return get_implicit_bit(); 559 } 560 LIBC_INLINE constexpr RetT next_toward_inf() const { 561 if (is_finite()) { 562 if (exp_sig_bits() == max_normal().uintval()) { 563 return inf(sign()); 564 } else if (exp_sig_bits() == max_subnormal().uintval()) { 565 return min_normal(sign()); 566 } else if (sig_bits() == SIG_MASK) { 567 return RetT(encode(sign(), ++biased_exponent(), Significand::zero())); 568 } else { 569 return RetT(bits + StorageType(1)); 570 } 571 } 572 return RetT(bits); 573 } 574 575 LIBC_INLINE constexpr StorageType get_explicit_mantissa() const { 576 return sig_bits(); 577 } 578 579 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 580 // TODO: Remove if possible. 581 LIBC_INLINE constexpr bool get_implicit_bit() const { 582 return static_cast<bool>(bits & EXPLICIT_BIT_MASK); 583 } 584 585 // This functions is specific to FPRepSem<FPType::X86_Binary80>. 586 // TODO: Remove if possible. 587 LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) { 588 if (get_implicit_bit() != implicitVal) 589 bits ^= EXPLICIT_BIT_MASK; 590 } 591 }; 592 593 // 'FPRepImpl' is the bottom of the class hierarchy that only deals with 594 // 'FPType'. The operations dealing with specific float semantics are 595 // implemented by 'FPRepSem' above and specialized when needed. 596 // 597 // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions 598 // creating new values (Builders) can return the appropriate type. That is, when 599 // creating a value through 'FPBits' below the builder will return an 'FPBits' 600 // value. 601 // FPBits<float>::zero(); // returns an FPBits<> 602 // 603 // When we don't care about specific C++ floating point type we can use 604 // 'FPRep' and specify the 'FPType' directly. 605 // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<> 606 template <FPType fp_type, typename RetT> 607 struct FPRepImpl : public FPRepSem<fp_type, RetT> { 608 using UP = FPRepSem<fp_type, RetT>; 609 using StorageType = typename UP::StorageType; 610 611 protected: 612 using UP::bits; 613 using UP::encode; 614 using UP::exp_bits; 615 using UP::exp_sig_bits; 616 617 using typename UP::BiasedExponent; 618 using typename UP::Exponent; 619 using typename UP::Significand; 620 621 using UP::FP_MASK; 622 623 public: 624 // Constants. 625 using UP::EXP_BIAS; 626 using UP::EXP_MASK; 627 using UP::FRACTION_MASK; 628 using UP::SIG_LEN; 629 using UP::SIG_MASK; 630 using UP::SIGN_MASK; 631 LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT = 632 (1 << UP::EXP_LEN) - 1; 633 634 // CTors 635 LIBC_INLINE constexpr FPRepImpl() = default; 636 LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {} 637 638 // Comparison 639 LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) { 640 return a.uintval() == b.uintval(); 641 } 642 LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) { 643 return a.uintval() != b.uintval(); 644 } 645 646 // Representation 647 LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; } 648 LIBC_INLINE constexpr void set_uintval(StorageType value) { 649 bits = (value & FP_MASK); 650 } 651 652 // Builders 653 using UP::inf; 654 using UP::max_normal; 655 using UP::max_subnormal; 656 using UP::min_normal; 657 using UP::min_subnormal; 658 using UP::one; 659 using UP::quiet_nan; 660 using UP::signaling_nan; 661 using UP::zero; 662 663 // Modifiers 664 LIBC_INLINE constexpr RetT abs() const { 665 return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK)); 666 } 667 668 // Observers 669 using UP::get_explicit_mantissa; 670 using UP::is_finite; 671 using UP::is_inf; 672 using UP::is_nan; 673 using UP::is_normal; 674 using UP::is_quiet_nan; 675 using UP::is_signaling_nan; 676 using UP::is_subnormal; 677 using UP::is_zero; 678 using UP::next_toward_inf; 679 using UP::sign; 680 LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); } 681 LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); } 682 LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); } 683 684 LIBC_INLINE constexpr uint16_t get_biased_exponent() const { 685 return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent())); 686 } 687 688 LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) { 689 UP::set_biased_exponent(BiasedExponent((int32_t)biased)); 690 } 691 692 LIBC_INLINE constexpr int get_exponent() const { 693 return static_cast<int32_t>(Exponent(UP::biased_exponent())); 694 } 695 696 // If the number is subnormal, the exponent is treated as if it were the 697 // minimum exponent for a normal number. This is to keep continuity between 698 // the normal and subnormal ranges, but it causes problems for functions where 699 // values are calculated from the exponent, since just subtracting the bias 700 // will give a slightly incorrect result. Additionally, zero has an exponent 701 // of zero, and that should actually be treated as zero. 702 LIBC_INLINE constexpr int get_explicit_exponent() const { 703 Exponent exponent(UP::biased_exponent()); 704 if (is_zero()) 705 exponent = Exponent::zero(); 706 if (exponent == Exponent::subnormal()) 707 exponent = Exponent::min(); 708 return static_cast<int32_t>(exponent); 709 } 710 711 LIBC_INLINE constexpr StorageType get_mantissa() const { 712 return bits & FRACTION_MASK; 713 } 714 715 LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) { 716 bits = UP::merge(bits, mantVal, FRACTION_MASK); 717 } 718 719 LIBC_INLINE constexpr void set_significand(StorageType sigVal) { 720 bits = UP::merge(bits, sigVal, SIG_MASK); 721 } 722 // Unsafe function to create a floating point representation. 723 // It simply packs the sign, biased exponent and mantissa values without 724 // checking bound nor normalization. 725 // 726 // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly 727 // in the 'mantissa' by the caller. This function will not check for its 728 // validity. 729 // 730 // FIXME: Use an uint32_t for 'biased_exp'. 731 LIBC_INLINE static constexpr RetT 732 create_value(Sign sign, StorageType biased_exp, StorageType mantissa) { 733 return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)), 734 Significand(mantissa))); 735 } 736 737 // The function converts integer number and unbiased exponent to proper 738 // float T type: 739 // Result = number * 2^(ep+1 - exponent_bias) 740 // Be careful! 741 // 1) "ep" is the raw exponent value. 742 // 2) The function adds +1 to ep for seamless normalized to denormalized 743 // transition. 744 // 3) The function does not check exponent high limit. 745 // 4) "number" zero value is not processed correctly. 746 // 5) Number is unsigned, so the result can be only positive. 747 LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) { 748 FPRepImpl result(0); 749 int lz = 750 UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number)); 751 752 number <<= lz; 753 ep -= lz; 754 755 if (LIBC_LIKELY(ep >= 0)) { 756 // Implicit number bit will be removed by mask 757 result.set_significand(number); 758 result.set_biased_exponent(static_cast<StorageType>(ep + 1)); 759 } else { 760 result.set_significand(number >> -ep); 761 } 762 return RetT(result.uintval()); 763 } 764 }; 765 766 // A generic class to manipulate floating point formats. 767 // It derives its functionality to FPRepImpl above. 768 template <FPType fp_type> 769 struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> { 770 using UP = FPRepImpl<fp_type, FPRep<fp_type>>; 771 using StorageType = typename UP::StorageType; 772 using UP::UP; 773 774 LIBC_INLINE constexpr explicit operator StorageType() const { 775 return UP::uintval(); 776 } 777 }; 778 779 } // namespace internal 780 781 // Returns the FPType corresponding to C++ type T on the host. 782 template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() { 783 using UnqualT = cpp::remove_cv_t<T>; 784 if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24) 785 return FPType::IEEE754_Binary32; 786 else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53) 787 return FPType::IEEE754_Binary64; 788 else if constexpr (cpp::is_same_v<UnqualT, long double>) { 789 if constexpr (__LDBL_MANT_DIG__ == 53) 790 return FPType::IEEE754_Binary64; 791 else if constexpr (__LDBL_MANT_DIG__ == 64) 792 return FPType::X86_Binary80; 793 else if constexpr (__LDBL_MANT_DIG__ == 113) 794 return FPType::IEEE754_Binary128; 795 } 796 #if defined(LIBC_TYPES_HAS_FLOAT16) 797 else if constexpr (cpp::is_same_v<UnqualT, float16>) 798 return FPType::IEEE754_Binary16; 799 #endif 800 #if defined(LIBC_TYPES_HAS_FLOAT128) 801 else if constexpr (cpp::is_same_v<UnqualT, float128>) 802 return FPType::IEEE754_Binary128; 803 #endif 804 else 805 static_assert(cpp::always_false<UnqualT>, "Unsupported type"); 806 } 807 808 // ----------------------------------------------------------------------------- 809 // **** WARNING **** 810 // This interface is shared with libc++, if you change this interface you need 811 // to update it in both libc and libc++. You should also be careful when adding 812 // dependencies to this file, since it needs to build for all libc++ targets. 813 // ----------------------------------------------------------------------------- 814 // A generic class to manipulate C++ floating point formats. 815 // It derives its functionality to FPRepImpl above. 816 template <typename T> 817 struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> { 818 static_assert(cpp::is_floating_point_v<T>, 819 "FPBits instantiated with invalid type."); 820 using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>; 821 using StorageType = typename UP::StorageType; 822 823 // Constructors. 824 LIBC_INLINE constexpr FPBits() = default; 825 826 template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) { 827 using Unqual = typename cpp::remove_cv_t<XType>; 828 if constexpr (cpp::is_same_v<Unqual, T>) { 829 UP::bits = cpp::bit_cast<StorageType>(x); 830 } else if constexpr (cpp::is_same_v<Unqual, StorageType>) { 831 UP::bits = x; 832 } else { 833 // We don't want accidental type promotions/conversions, so we require 834 // exact type match. 835 static_assert(cpp::always_false<XType>); 836 } 837 } 838 839 // Floating-point conversions. 840 LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); } 841 }; 842 843 } // namespace fputil 844 } // namespace LIBC_NAMESPACE_DECL 845 846 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H 847