1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include <stdint.h>
17*6467f958SSadaf Ebrahimi #include <math.h>
18*6467f958SSadaf Ebrahimi #include "fplib.h"
19*6467f958SSadaf Ebrahimi
20*6467f958SSadaf Ebrahimi #if !defined(FLT_MANT_DIG)
21*6467f958SSadaf Ebrahimi #define FLT_MANT_DIG 24
22*6467f958SSadaf Ebrahimi #endif
23*6467f958SSadaf Ebrahimi #define as_float(x) (*((float *)(&x)))
24*6467f958SSadaf Ebrahimi #define as_long(x) (*((int64_t *)(&x)))
25*6467f958SSadaf Ebrahimi
clz(uint64_t value)26*6467f958SSadaf Ebrahimi static uint32_t clz(uint64_t value)
27*6467f958SSadaf Ebrahimi {
28*6467f958SSadaf Ebrahimi uint32_t num_zeros;
29*6467f958SSadaf Ebrahimi
30*6467f958SSadaf Ebrahimi for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
31*6467f958SSadaf Ebrahimi {
32*6467f958SSadaf Ebrahimi volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
33*6467f958SSadaf Ebrahimi if (v) break;
34*6467f958SSadaf Ebrahimi }
35*6467f958SSadaf Ebrahimi return num_zeros;
36*6467f958SSadaf Ebrahimi }
37*6467f958SSadaf Ebrahimi
qcom_s64_2_f32(int64_t data,bool sat,roundingMode rnd)38*6467f958SSadaf Ebrahimi float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
39*6467f958SSadaf Ebrahimi {
40*6467f958SSadaf Ebrahimi switch (rnd) {
41*6467f958SSadaf Ebrahimi case qcomRTZ: {
42*6467f958SSadaf Ebrahimi int sign = 0;
43*6467f958SSadaf Ebrahimi if (!data)
44*6467f958SSadaf Ebrahimi return 0.0f;
45*6467f958SSadaf Ebrahimi if (data < 0){
46*6467f958SSadaf Ebrahimi data = - data;
47*6467f958SSadaf Ebrahimi sign = 1;
48*6467f958SSadaf Ebrahimi }
49*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
50*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
51*6467f958SSadaf Ebrahimi uint32_t mantissa;
52*6467f958SSadaf Ebrahimi if (mantShift >= 0)
53*6467f958SSadaf Ebrahimi mantissa = (uint32_t)((uint64_t)data >> mantShift);
54*6467f958SSadaf Ebrahimi else
55*6467f958SSadaf Ebrahimi mantissa = (uint32_t)((uint64_t)data << -mantShift);
56*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
57*6467f958SSadaf Ebrahimi
58*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
59*6467f958SSadaf Ebrahimi if (sign)
60*6467f958SSadaf Ebrahimi result |= 0x80000000;
61*6467f958SSadaf Ebrahimi return as_float(result);
62*6467f958SSadaf Ebrahimi break;
63*6467f958SSadaf Ebrahimi }
64*6467f958SSadaf Ebrahimi case qcomRTE: return (float)(data); break;
65*6467f958SSadaf Ebrahimi case qcomRTP: {
66*6467f958SSadaf Ebrahimi int sign = 0;
67*6467f958SSadaf Ebrahimi int inExact = 0;
68*6467f958SSadaf Ebrahimi uint32_t f = 0xdf000000;
69*6467f958SSadaf Ebrahimi if (!data)
70*6467f958SSadaf Ebrahimi return 0.0f;
71*6467f958SSadaf Ebrahimi if (data == 0x8000000000000000)
72*6467f958SSadaf Ebrahimi return as_float(f);
73*6467f958SSadaf Ebrahimi if (data < 0){
74*6467f958SSadaf Ebrahimi data = - data;
75*6467f958SSadaf Ebrahimi sign = 1;
76*6467f958SSadaf Ebrahimi }
77*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
78*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
79*6467f958SSadaf Ebrahimi uint32_t mantissa;
80*6467f958SSadaf Ebrahimi if (mantShift >= 0){
81*6467f958SSadaf Ebrahimi uint64_t temp = (uint64_t)data >> mantShift;
82*6467f958SSadaf Ebrahimi if ((temp << mantShift) != data)
83*6467f958SSadaf Ebrahimi inExact = 1;
84*6467f958SSadaf Ebrahimi mantissa = (uint32_t)temp;
85*6467f958SSadaf Ebrahimi }
86*6467f958SSadaf Ebrahimi else
87*6467f958SSadaf Ebrahimi {
88*6467f958SSadaf Ebrahimi mantissa = (uint32_t)((uint64_t)data << -mantShift);
89*6467f958SSadaf Ebrahimi }
90*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
91*6467f958SSadaf Ebrahimi
92*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
93*6467f958SSadaf Ebrahimi if (sign)
94*6467f958SSadaf Ebrahimi result |= 0x80000000;
95*6467f958SSadaf Ebrahimi if (sign)
96*6467f958SSadaf Ebrahimi return as_float(result); // for negative inputs return rtz results
97*6467f958SSadaf Ebrahimi else
98*6467f958SSadaf Ebrahimi {
99*6467f958SSadaf Ebrahimi if(inExact)
100*6467f958SSadaf Ebrahimi { // for positive inputs return higher next fp
101*6467f958SSadaf Ebrahimi uint32_t high_float = 0x7f7fffff;
102*6467f958SSadaf Ebrahimi return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
103*6467f958SSadaf Ebrahimi }
104*6467f958SSadaf Ebrahimi else
105*6467f958SSadaf Ebrahimi return as_float(result);
106*6467f958SSadaf Ebrahimi }
107*6467f958SSadaf Ebrahimi }
108*6467f958SSadaf Ebrahimi break;
109*6467f958SSadaf Ebrahimi case qcomRTN: {
110*6467f958SSadaf Ebrahimi int sign = 0;
111*6467f958SSadaf Ebrahimi int inExact = 0;
112*6467f958SSadaf Ebrahimi uint32_t f = 0xdf000000;
113*6467f958SSadaf Ebrahimi if (!data)
114*6467f958SSadaf Ebrahimi return 0.0f;
115*6467f958SSadaf Ebrahimi if (data == 0x8000000000000000)
116*6467f958SSadaf Ebrahimi return as_float(f);
117*6467f958SSadaf Ebrahimi if (data < 0){
118*6467f958SSadaf Ebrahimi data = - data;
119*6467f958SSadaf Ebrahimi sign = 1;
120*6467f958SSadaf Ebrahimi }
121*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
122*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
123*6467f958SSadaf Ebrahimi uint32_t mantissa;
124*6467f958SSadaf Ebrahimi if (mantShift >= 0){
125*6467f958SSadaf Ebrahimi uint64_t temp = (uint64_t)data >> mantShift;
126*6467f958SSadaf Ebrahimi if (temp << mantShift != data)
127*6467f958SSadaf Ebrahimi inExact = 1;
128*6467f958SSadaf Ebrahimi mantissa = (uint32_t)temp;
129*6467f958SSadaf Ebrahimi }
130*6467f958SSadaf Ebrahimi else
131*6467f958SSadaf Ebrahimi mantissa = (uint32_t)((uint64_t)data << -mantShift);
132*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
133*6467f958SSadaf Ebrahimi
134*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
135*6467f958SSadaf Ebrahimi if (sign)
136*6467f958SSadaf Ebrahimi result |= 0x80000000;
137*6467f958SSadaf Ebrahimi if (!sign)
138*6467f958SSadaf Ebrahimi return as_float(result); // for positive inputs return RTZ result
139*6467f958SSadaf Ebrahimi else{
140*6467f958SSadaf Ebrahimi if(inExact){ // for negative inputs find the lower next fp number
141*6467f958SSadaf Ebrahimi uint32_t low_float = 0xff7fffff;
142*6467f958SSadaf Ebrahimi return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
143*6467f958SSadaf Ebrahimi }
144*6467f958SSadaf Ebrahimi else
145*6467f958SSadaf Ebrahimi return as_float(result);
146*6467f958SSadaf Ebrahimi }
147*6467f958SSadaf Ebrahimi }
148*6467f958SSadaf Ebrahimi case qcomRoundingModeCount: {
149*6467f958SSadaf Ebrahimi break; // Avoid build error for unhandled enum value
150*6467f958SSadaf Ebrahimi }
151*6467f958SSadaf Ebrahimi }
152*6467f958SSadaf Ebrahimi return 0.0f;
153*6467f958SSadaf Ebrahimi }
154*6467f958SSadaf Ebrahimi
qcom_u64_2_f32(uint64_t data,bool sat,roundingMode rnd)155*6467f958SSadaf Ebrahimi float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
156*6467f958SSadaf Ebrahimi {
157*6467f958SSadaf Ebrahimi switch (rnd) {
158*6467f958SSadaf Ebrahimi case qcomRTZ: {
159*6467f958SSadaf Ebrahimi if (!data)
160*6467f958SSadaf Ebrahimi return 0.0f;
161*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
162*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
163*6467f958SSadaf Ebrahimi uint32_t mantissa;
164*6467f958SSadaf Ebrahimi if (mantShift >= 0)
165*6467f958SSadaf Ebrahimi mantissa = (uint32_t)(data >> mantShift);
166*6467f958SSadaf Ebrahimi else
167*6467f958SSadaf Ebrahimi mantissa = (uint32_t)(data << -mantShift);
168*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
169*6467f958SSadaf Ebrahimi
170*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
171*6467f958SSadaf Ebrahimi return as_float(result);
172*6467f958SSadaf Ebrahimi break;
173*6467f958SSadaf Ebrahimi }
174*6467f958SSadaf Ebrahimi case qcomRTE: return (float)(data); break;
175*6467f958SSadaf Ebrahimi case qcomRTP: {
176*6467f958SSadaf Ebrahimi int inExact = 0;
177*6467f958SSadaf Ebrahimi if (!data)
178*6467f958SSadaf Ebrahimi return 0.0f;
179*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
180*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
181*6467f958SSadaf Ebrahimi uint32_t mantissa;
182*6467f958SSadaf Ebrahimi if (mantShift >= 0){
183*6467f958SSadaf Ebrahimi uint64_t temp = data >> mantShift;
184*6467f958SSadaf Ebrahimi if (temp << mantShift != data)
185*6467f958SSadaf Ebrahimi inExact = 1;
186*6467f958SSadaf Ebrahimi mantissa = (uint32_t)temp;
187*6467f958SSadaf Ebrahimi }
188*6467f958SSadaf Ebrahimi else
189*6467f958SSadaf Ebrahimi mantissa = (uint32_t)(data << -mantShift);
190*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
191*6467f958SSadaf Ebrahimi
192*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
193*6467f958SSadaf Ebrahimi if(inExact){ // for positive inputs return higher next fp
194*6467f958SSadaf Ebrahimi uint32_t high_float = 0x7f7fffff;
195*6467f958SSadaf Ebrahimi return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
196*6467f958SSadaf Ebrahimi }
197*6467f958SSadaf Ebrahimi else
198*6467f958SSadaf Ebrahimi return as_float(result);
199*6467f958SSadaf Ebrahimi }
200*6467f958SSadaf Ebrahimi case qcomRTN: {
201*6467f958SSadaf Ebrahimi int inExact = 0;
202*6467f958SSadaf Ebrahimi if (!data)
203*6467f958SSadaf Ebrahimi return 0.0f;
204*6467f958SSadaf Ebrahimi uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
205*6467f958SSadaf Ebrahimi int mantShift = 40 - clz(data);
206*6467f958SSadaf Ebrahimi uint32_t mantissa;
207*6467f958SSadaf Ebrahimi if (mantShift >= 0){
208*6467f958SSadaf Ebrahimi uint64_t temp = (uint64_t)data >> mantShift;
209*6467f958SSadaf Ebrahimi if (temp << mantShift != data)
210*6467f958SSadaf Ebrahimi inExact = 1;
211*6467f958SSadaf Ebrahimi mantissa = (uint32_t)temp;
212*6467f958SSadaf Ebrahimi }
213*6467f958SSadaf Ebrahimi else
214*6467f958SSadaf Ebrahimi mantissa = (uint32_t)((uint64_t)data << -mantShift);
215*6467f958SSadaf Ebrahimi mantissa &= 0x7fffff;//mask off the leading 1
216*6467f958SSadaf Ebrahimi
217*6467f958SSadaf Ebrahimi uint32_t result = exponent | mantissa;
218*6467f958SSadaf Ebrahimi return as_float(result); // for positive inputs return RTZ result
219*6467f958SSadaf Ebrahimi }
220*6467f958SSadaf Ebrahimi case qcomRoundingModeCount: {
221*6467f958SSadaf Ebrahimi break; // Avoid build error for unhandled enum value
222*6467f958SSadaf Ebrahimi }
223*6467f958SSadaf Ebrahimi }
224*6467f958SSadaf Ebrahimi return 0.0f;
225*6467f958SSadaf Ebrahimi }
226