xref: /aosp_15_r20/external/ComputeLibrary/support/Bfloat16.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2020-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_BFLOAT16_H
25 #define ARM_COMPUTE_BFLOAT16_H
26 
27 #include <cstdint>
28 #include <cstring>
29 
30 namespace arm_compute
31 {
32 namespace
33 {
34 /** Convert float to bfloat16
35  *
36  * @param[in] v Floating-point value to convert to bfloat
37  *
38  * @return Converted value
39  */
float_to_bf16(const float v)40 inline uint16_t float_to_bf16(const float v)
41 {
42     const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);
43 #if defined(ARM_COMPUTE_ENABLE_BF16)
44     uint16_t res;
45 
46     __asm __volatile(
47         "ldr    s0, [%[fromptr]]\n"
48         ".inst    0x1e634000\n" // BFCVT h0, s0
49         "str    h0, [%[toptr]]\n"
50         :
51         : [fromptr] "r"(fromptr), [toptr] "r"(&res)
52         : "v0", "memory");
53 #else  /* defined(ARM_COMPUTE_ENABLE_BF16) */
54     uint16_t       res   = (*fromptr >> 16);
55     const uint16_t error = (*fromptr & 0x0000ffff);
56     uint16_t       bf_l  = res & 0x0001;
57     if((error > 0x8000) || ((error == 0x8000) && (bf_l != 0)))
58     {
59         res += 1;
60     }
61 #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
62     return res;
63 }
64 
65 /** Convert bfloat16 to float
66  *
67  * @param[in] v Bfloat16 value to convert to float
68  *
69  * @return Converted value
70  */
bf16_to_float(const uint16_t & v)71 inline float bf16_to_float(const uint16_t &v)
72 {
73     const uint32_t lv = (v << 16);
74     float          fp;
75     memcpy(&fp, &lv, sizeof(lv));
76     return fp;
77 }
78 }
79 
80 /** Brain floating point representation class */
81 class bfloat16 final
82 {
83 public:
84     /** Default Constructor */
bfloat16()85     bfloat16()
86         : value(0)
87     {
88     }
89     /** Constructor
90      *
91      * @param[in] v Floating-point value
92      */
bfloat16(float v)93     bfloat16(float v)
94         : value(float_to_bf16(v))
95     {
96     }
97     /** Assignment operator
98      *
99      * @param[in] v Floating point value to assign
100      *
101      * @return The updated object
102      */
103     bfloat16 &operator=(float v)
104     {
105         value = float_to_bf16(v);
106         return *this;
107     }
108     /** Floating point conversion operator
109      *
110      * @return Floating point representation of the value
111      */
112     operator float() const
113     {
114         return bf16_to_float(value);
115     }
116     /** Lowest representative value
117      *
118      * @return Returns the lowest finite value representable by bfloat16
119      */
lowest()120     static bfloat16 lowest()
121     {
122         bfloat16 val;
123         val.value = 0xFF7F;
124         return val;
125     }
126     /** Largest representative value
127      *
128      * @return Returns the largest finite value representable by bfloat16
129      */
max()130     static bfloat16 max()
131     {
132         bfloat16 val;
133         val.value = 0x7F7F;
134         return val;
135     }
136 
137 private:
138     uint16_t value;
139 };
140 } // namespace arm_compute
141 #endif /* ARM_COMPUTE_BFLOAT16_H */
142