xref: /aosp_15_r20/external/armnn/src/armnnUtils/BFloat16.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1*89c4ff92SAndroid Build Coastguard Worker //
2*89c4ff92SAndroid Build Coastguard Worker // Copyright © 2020 Arm Ltd. All rights reserved.
3*89c4ff92SAndroid Build Coastguard Worker // SPDX-License-Identifier: MIT
4*89c4ff92SAndroid Build Coastguard Worker //
5*89c4ff92SAndroid Build Coastguard Worker 
6*89c4ff92SAndroid Build Coastguard Worker #pragma once
7*89c4ff92SAndroid Build Coastguard Worker 
8*89c4ff92SAndroid Build Coastguard Worker #include <ostream>
9*89c4ff92SAndroid Build Coastguard Worker #include <cmath>
10*89c4ff92SAndroid Build Coastguard Worker #include <cstring>
11*89c4ff92SAndroid Build Coastguard Worker #include <stdint.h>
12*89c4ff92SAndroid Build Coastguard Worker 
13*89c4ff92SAndroid Build Coastguard Worker namespace armnn
14*89c4ff92SAndroid Build Coastguard Worker {
15*89c4ff92SAndroid Build Coastguard Worker class BFloat16
16*89c4ff92SAndroid Build Coastguard Worker {
17*89c4ff92SAndroid Build Coastguard Worker public:
BFloat16()18*89c4ff92SAndroid Build Coastguard Worker     BFloat16()
19*89c4ff92SAndroid Build Coastguard Worker     : m_Value(0)
20*89c4ff92SAndroid Build Coastguard Worker     {}
21*89c4ff92SAndroid Build Coastguard Worker 
22*89c4ff92SAndroid Build Coastguard Worker     BFloat16(const BFloat16& v) = default;
23*89c4ff92SAndroid Build Coastguard Worker 
BFloat16(uint16_t v)24*89c4ff92SAndroid Build Coastguard Worker     explicit BFloat16(uint16_t v)
25*89c4ff92SAndroid Build Coastguard Worker     : m_Value(v)
26*89c4ff92SAndroid Build Coastguard Worker     {}
27*89c4ff92SAndroid Build Coastguard Worker 
BFloat16(float v)28*89c4ff92SAndroid Build Coastguard Worker     explicit BFloat16(float v)
29*89c4ff92SAndroid Build Coastguard Worker     {
30*89c4ff92SAndroid Build Coastguard Worker         m_Value = Float32ToBFloat16(v).Val();
31*89c4ff92SAndroid Build Coastguard Worker     }
32*89c4ff92SAndroid Build Coastguard Worker 
operator float() const33*89c4ff92SAndroid Build Coastguard Worker     operator float() const
34*89c4ff92SAndroid Build Coastguard Worker     {
35*89c4ff92SAndroid Build Coastguard Worker         return ToFloat32();
36*89c4ff92SAndroid Build Coastguard Worker     }
37*89c4ff92SAndroid Build Coastguard Worker 
38*89c4ff92SAndroid Build Coastguard Worker     BFloat16& operator=(const BFloat16& other) = default;
39*89c4ff92SAndroid Build Coastguard Worker 
operator =(float v)40*89c4ff92SAndroid Build Coastguard Worker     BFloat16& operator=(float v)
41*89c4ff92SAndroid Build Coastguard Worker     {
42*89c4ff92SAndroid Build Coastguard Worker         m_Value = Float32ToBFloat16(v).Val();
43*89c4ff92SAndroid Build Coastguard Worker         return *this;
44*89c4ff92SAndroid Build Coastguard Worker     }
45*89c4ff92SAndroid Build Coastguard Worker 
operator ==(const BFloat16 & r) const46*89c4ff92SAndroid Build Coastguard Worker     bool operator==(const BFloat16& r) const
47*89c4ff92SAndroid Build Coastguard Worker     {
48*89c4ff92SAndroid Build Coastguard Worker         return m_Value == r.Val();
49*89c4ff92SAndroid Build Coastguard Worker     }
50*89c4ff92SAndroid Build Coastguard Worker 
Float32ToBFloat16(const float v)51*89c4ff92SAndroid Build Coastguard Worker     static BFloat16 Float32ToBFloat16(const float v)
52*89c4ff92SAndroid Build Coastguard Worker     {
53*89c4ff92SAndroid Build Coastguard Worker         if (std::isnan(v))
54*89c4ff92SAndroid Build Coastguard Worker         {
55*89c4ff92SAndroid Build Coastguard Worker             return Nan();
56*89c4ff92SAndroid Build Coastguard Worker         }
57*89c4ff92SAndroid Build Coastguard Worker         else
58*89c4ff92SAndroid Build Coastguard Worker         {
59*89c4ff92SAndroid Build Coastguard Worker             // Round value to the nearest even
60*89c4ff92SAndroid Build Coastguard Worker             // Float32
61*89c4ff92SAndroid Build Coastguard Worker             // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM
62*89c4ff92SAndroid Build Coastguard Worker             // BFloat16
63*89c4ff92SAndroid Build Coastguard Worker             // S EEEEEEEE MMMMMML
64*89c4ff92SAndroid Build Coastguard Worker             // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16)
65*89c4ff92SAndroid Build Coastguard Worker             // R: Rounding bit
66*89c4ff92SAndroid Build Coastguard Worker             // LSB = 0, R = 0 -> round down
67*89c4ff92SAndroid Build Coastguard Worker             // LSB = 1, R = 0 -> round down
68*89c4ff92SAndroid Build Coastguard Worker             // LSB = 0, R = 1, all the rest = 0 -> round down
69*89c4ff92SAndroid Build Coastguard Worker             // LSB = 1, R = 1 -> round up
70*89c4ff92SAndroid Build Coastguard Worker             // LSB = 0, R = 1 -> round up
71*89c4ff92SAndroid Build Coastguard Worker             const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
72*89c4ff92SAndroid Build Coastguard Worker             uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
73*89c4ff92SAndroid Build Coastguard Worker             // Mark the LSB
74*89c4ff92SAndroid Build Coastguard Worker             const uint16_t lsb = u16 & 0x0001;
75*89c4ff92SAndroid Build Coastguard Worker             // Mark the error to be truncate (the rest of 16 bits of FP32)
76*89c4ff92SAndroid Build Coastguard Worker             const uint16_t error = static_cast<uint16_t>((*u32 & 0x0000FFFF));
77*89c4ff92SAndroid Build Coastguard Worker             if ((error > 0x8000 || (error == 0x8000 && lsb == 1)))
78*89c4ff92SAndroid Build Coastguard Worker             {
79*89c4ff92SAndroid Build Coastguard Worker                 u16++;
80*89c4ff92SAndroid Build Coastguard Worker             }
81*89c4ff92SAndroid Build Coastguard Worker             BFloat16 b(u16);
82*89c4ff92SAndroid Build Coastguard Worker             return b;
83*89c4ff92SAndroid Build Coastguard Worker         }
84*89c4ff92SAndroid Build Coastguard Worker     }
85*89c4ff92SAndroid Build Coastguard Worker 
ToFloat32() const86*89c4ff92SAndroid Build Coastguard Worker     float ToFloat32() const
87*89c4ff92SAndroid Build Coastguard Worker     {
88*89c4ff92SAndroid Build Coastguard Worker         const uint32_t u32 = static_cast<uint32_t>(m_Value << 16u);
89*89c4ff92SAndroid Build Coastguard Worker         float f32;
90*89c4ff92SAndroid Build Coastguard Worker         static_assert(sizeof u32 == sizeof f32, "");
91*89c4ff92SAndroid Build Coastguard Worker         std::memcpy(&f32, &u32, sizeof u32);
92*89c4ff92SAndroid Build Coastguard Worker         return f32;
93*89c4ff92SAndroid Build Coastguard Worker     }
94*89c4ff92SAndroid Build Coastguard Worker 
Val() const95*89c4ff92SAndroid Build Coastguard Worker     uint16_t Val() const
96*89c4ff92SAndroid Build Coastguard Worker     {
97*89c4ff92SAndroid Build Coastguard Worker         return m_Value;
98*89c4ff92SAndroid Build Coastguard Worker     }
99*89c4ff92SAndroid Build Coastguard Worker 
Max()100*89c4ff92SAndroid Build Coastguard Worker     static BFloat16 Max()
101*89c4ff92SAndroid Build Coastguard Worker     {
102*89c4ff92SAndroid Build Coastguard Worker         uint16_t max = 0x7F7F;
103*89c4ff92SAndroid Build Coastguard Worker         return BFloat16(max);
104*89c4ff92SAndroid Build Coastguard Worker     }
105*89c4ff92SAndroid Build Coastguard Worker 
Nan()106*89c4ff92SAndroid Build Coastguard Worker     static BFloat16 Nan()
107*89c4ff92SAndroid Build Coastguard Worker     {
108*89c4ff92SAndroid Build Coastguard Worker         uint16_t nan = 0x7FC0;
109*89c4ff92SAndroid Build Coastguard Worker         return BFloat16(nan);
110*89c4ff92SAndroid Build Coastguard Worker     }
111*89c4ff92SAndroid Build Coastguard Worker 
Inf()112*89c4ff92SAndroid Build Coastguard Worker     static BFloat16 Inf()
113*89c4ff92SAndroid Build Coastguard Worker     {
114*89c4ff92SAndroid Build Coastguard Worker         uint16_t infVal = 0x7F80;
115*89c4ff92SAndroid Build Coastguard Worker         return BFloat16(infVal);
116*89c4ff92SAndroid Build Coastguard Worker     }
117*89c4ff92SAndroid Build Coastguard Worker 
118*89c4ff92SAndroid Build Coastguard Worker private:
119*89c4ff92SAndroid Build Coastguard Worker     uint16_t m_Value;
120*89c4ff92SAndroid Build Coastguard Worker };
121*89c4ff92SAndroid Build Coastguard Worker 
operator <<(std::ostream & os,const BFloat16 & b)122*89c4ff92SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os, const BFloat16& b)
123*89c4ff92SAndroid Build Coastguard Worker {
124*89c4ff92SAndroid Build Coastguard Worker     os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
125*89c4ff92SAndroid Build Coastguard Worker     return os;
126*89c4ff92SAndroid Build Coastguard Worker }
127*89c4ff92SAndroid Build Coastguard Worker 
128*89c4ff92SAndroid Build Coastguard Worker } //namespace armnn
129