xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/cpu/runtime_fp16.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FP16_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FP16_H_
18 
19 #include <stdint.h>
20 
21 // _Float16 always gets us the correct ABI type, so use that if available.
22 // AArch64 GCC defines __FLT16_MANT_DIG__ even when _Float16 is not available.
23 #if defined(__FLT16_MANT_DIG__) && \
24     (defined(__clang__) || !(defined(__GNUC__) && defined(__aarch64__)))
25 using XlaF16ABIType = _Float16;
26 #elif defined(__x86_64__)
27 // Older versions of Clang don't have _Float16. Since both float and _Float16
28 // are passed in the same register we can use the wider type and careful casting
29 // to conform to x86_64 psABI. This only works with the assumption that we're
30 // dealing with little-endian values passed in wider registers.
31 using XlaF16ABIType = float;
32 #else
33 // Default to uint16_t if we have nothing else.
34 using XlaF16ABIType = uint16_t;
35 #endif
36 
37 // Converts an F32 value to a F16.
38 extern "C" XlaF16ABIType __gnu_f2h_ieee(float);
39 
40 // Converts an F16 value to a F32.
41 extern "C" float __gnu_h2f_ieee(XlaF16ABIType);
42 
43 // Converts an F64 value to a F16.
44 extern "C" XlaF16ABIType __truncdfhf2(double);
45 
46 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_FP16_H_
47