xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/conversions/fplib.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi //    http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include <stdint.h>
17*6467f958SSadaf Ebrahimi #include <math.h>
18*6467f958SSadaf Ebrahimi #include "fplib.h"
19*6467f958SSadaf Ebrahimi 
20*6467f958SSadaf Ebrahimi #if !defined(FLT_MANT_DIG)
21*6467f958SSadaf Ebrahimi #define FLT_MANT_DIG    24
22*6467f958SSadaf Ebrahimi #endif
23*6467f958SSadaf Ebrahimi #define as_float(x)     (*((float *)(&x)))
24*6467f958SSadaf Ebrahimi #define as_long(x)      (*((int64_t *)(&x)))
25*6467f958SSadaf Ebrahimi 
clz(uint64_t value)26*6467f958SSadaf Ebrahimi static uint32_t clz(uint64_t value)
27*6467f958SSadaf Ebrahimi {
28*6467f958SSadaf Ebrahimi     uint32_t num_zeros;
29*6467f958SSadaf Ebrahimi 
30*6467f958SSadaf Ebrahimi     for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
31*6467f958SSadaf Ebrahimi     {
32*6467f958SSadaf Ebrahimi         volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
33*6467f958SSadaf Ebrahimi         if (v) break;
34*6467f958SSadaf Ebrahimi     }
35*6467f958SSadaf Ebrahimi     return num_zeros;
36*6467f958SSadaf Ebrahimi }
37*6467f958SSadaf Ebrahimi 
qcom_s64_2_f32(int64_t data,bool sat,roundingMode rnd)38*6467f958SSadaf Ebrahimi float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
39*6467f958SSadaf Ebrahimi {
40*6467f958SSadaf Ebrahimi     switch (rnd) {
41*6467f958SSadaf Ebrahimi         case qcomRTZ: {
42*6467f958SSadaf Ebrahimi             int sign = 0;
43*6467f958SSadaf Ebrahimi             if (!data)
44*6467f958SSadaf Ebrahimi                 return 0.0f;
45*6467f958SSadaf Ebrahimi             if (data < 0){
46*6467f958SSadaf Ebrahimi                 data = - data;
47*6467f958SSadaf Ebrahimi                 sign = 1;
48*6467f958SSadaf Ebrahimi             }
49*6467f958SSadaf Ebrahimi             uint32_t    exponent   = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
50*6467f958SSadaf Ebrahimi             int         mantShift  = 40 - clz(data);
51*6467f958SSadaf Ebrahimi             uint32_t    mantissa;
52*6467f958SSadaf Ebrahimi             if (mantShift >= 0)
53*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)((uint64_t)data >> mantShift);
54*6467f958SSadaf Ebrahimi             else
55*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)((uint64_t)data << -mantShift);
56*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
57*6467f958SSadaf Ebrahimi 
58*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
59*6467f958SSadaf Ebrahimi             if (sign)
60*6467f958SSadaf Ebrahimi                 result |= 0x80000000;
61*6467f958SSadaf Ebrahimi             return as_float(result);
62*6467f958SSadaf Ebrahimi             break;
63*6467f958SSadaf Ebrahimi         }
64*6467f958SSadaf Ebrahimi         case qcomRTE: return (float)(data); break;
65*6467f958SSadaf Ebrahimi         case qcomRTP: {
66*6467f958SSadaf Ebrahimi             int         sign    = 0;
67*6467f958SSadaf Ebrahimi             int         inExact = 0;
68*6467f958SSadaf Ebrahimi             uint32_t    f       = 0xdf000000;
69*6467f958SSadaf Ebrahimi             if (!data)
70*6467f958SSadaf Ebrahimi                 return 0.0f;
71*6467f958SSadaf Ebrahimi             if (data == 0x8000000000000000)
72*6467f958SSadaf Ebrahimi                 return as_float(f);
73*6467f958SSadaf Ebrahimi             if (data < 0){
74*6467f958SSadaf Ebrahimi                 data = - data;
75*6467f958SSadaf Ebrahimi                 sign = 1;
76*6467f958SSadaf Ebrahimi             }
77*6467f958SSadaf Ebrahimi             uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
78*6467f958SSadaf Ebrahimi             int         mantShift   = 40 - clz(data);
79*6467f958SSadaf Ebrahimi             uint32_t mantissa;
80*6467f958SSadaf Ebrahimi             if (mantShift >= 0){
81*6467f958SSadaf Ebrahimi                 uint64_t temp = (uint64_t)data >> mantShift;
82*6467f958SSadaf Ebrahimi                 if ((temp << mantShift) != data)
83*6467f958SSadaf Ebrahimi                     inExact = 1;
84*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)temp;
85*6467f958SSadaf Ebrahimi             }
86*6467f958SSadaf Ebrahimi             else
87*6467f958SSadaf Ebrahimi             {
88*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)((uint64_t)data << -mantShift);
89*6467f958SSadaf Ebrahimi             }
90*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
91*6467f958SSadaf Ebrahimi 
92*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
93*6467f958SSadaf Ebrahimi             if (sign)
94*6467f958SSadaf Ebrahimi                 result |= 0x80000000;
95*6467f958SSadaf Ebrahimi             if (sign)
96*6467f958SSadaf Ebrahimi                 return as_float(result); // for negative inputs return rtz results
97*6467f958SSadaf Ebrahimi             else
98*6467f958SSadaf Ebrahimi             {
99*6467f958SSadaf Ebrahimi                 if(inExact)
100*6467f958SSadaf Ebrahimi                 { // for positive inputs return higher next fp
101*6467f958SSadaf Ebrahimi                     uint32_t high_float = 0x7f7fffff;
102*6467f958SSadaf Ebrahimi                     return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
103*6467f958SSadaf Ebrahimi                 }
104*6467f958SSadaf Ebrahimi                 else
105*6467f958SSadaf Ebrahimi                     return as_float(result);
106*6467f958SSadaf Ebrahimi             }
107*6467f958SSadaf Ebrahimi         }
108*6467f958SSadaf Ebrahimi         break;
109*6467f958SSadaf Ebrahimi         case qcomRTN: {
110*6467f958SSadaf Ebrahimi             int sign = 0;
111*6467f958SSadaf Ebrahimi             int inExact = 0;
112*6467f958SSadaf Ebrahimi             uint32_t f = 0xdf000000;
113*6467f958SSadaf Ebrahimi             if (!data)
114*6467f958SSadaf Ebrahimi                 return 0.0f;
115*6467f958SSadaf Ebrahimi             if (data == 0x8000000000000000)
116*6467f958SSadaf Ebrahimi                 return as_float(f);
117*6467f958SSadaf Ebrahimi             if (data < 0){
118*6467f958SSadaf Ebrahimi                 data = - data;
119*6467f958SSadaf Ebrahimi                 sign = 1;
120*6467f958SSadaf Ebrahimi             }
121*6467f958SSadaf Ebrahimi             uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
122*6467f958SSadaf Ebrahimi             int         mantShift   = 40 - clz(data);
123*6467f958SSadaf Ebrahimi             uint32_t    mantissa;
124*6467f958SSadaf Ebrahimi             if (mantShift >= 0){
125*6467f958SSadaf Ebrahimi                 uint64_t temp = (uint64_t)data >> mantShift;
126*6467f958SSadaf Ebrahimi                 if (temp << mantShift != data)
127*6467f958SSadaf Ebrahimi                     inExact = 1;
128*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)temp;
129*6467f958SSadaf Ebrahimi             }
130*6467f958SSadaf Ebrahimi             else
131*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)((uint64_t)data << -mantShift);
132*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
133*6467f958SSadaf Ebrahimi 
134*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
135*6467f958SSadaf Ebrahimi             if (sign)
136*6467f958SSadaf Ebrahimi                 result |= 0x80000000;
137*6467f958SSadaf Ebrahimi             if (!sign)
138*6467f958SSadaf Ebrahimi                 return as_float(result); // for positive inputs return RTZ result
139*6467f958SSadaf Ebrahimi             else{
140*6467f958SSadaf Ebrahimi                 if(inExact){ // for negative inputs find the lower next fp number
141*6467f958SSadaf Ebrahimi                     uint32_t low_float = 0xff7fffff;
142*6467f958SSadaf Ebrahimi                     return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
143*6467f958SSadaf Ebrahimi                 }
144*6467f958SSadaf Ebrahimi                 else
145*6467f958SSadaf Ebrahimi                     return as_float(result);
146*6467f958SSadaf Ebrahimi             }
147*6467f958SSadaf Ebrahimi         }
148*6467f958SSadaf Ebrahimi         case qcomRoundingModeCount: {
149*6467f958SSadaf Ebrahimi             break; // Avoid build error for unhandled enum value
150*6467f958SSadaf Ebrahimi         }
151*6467f958SSadaf Ebrahimi     }
152*6467f958SSadaf Ebrahimi     return 0.0f;
153*6467f958SSadaf Ebrahimi }
154*6467f958SSadaf Ebrahimi 
qcom_u64_2_f32(uint64_t data,bool sat,roundingMode rnd)155*6467f958SSadaf Ebrahimi float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
156*6467f958SSadaf Ebrahimi {
157*6467f958SSadaf Ebrahimi     switch (rnd) {
158*6467f958SSadaf Ebrahimi         case qcomRTZ: {
159*6467f958SSadaf Ebrahimi             if (!data)
160*6467f958SSadaf Ebrahimi                 return 0.0f;
161*6467f958SSadaf Ebrahimi             uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
162*6467f958SSadaf Ebrahimi             int         mantShift   = 40 - clz(data);
163*6467f958SSadaf Ebrahimi             uint32_t    mantissa;
164*6467f958SSadaf Ebrahimi             if (mantShift >= 0)
165*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)(data >> mantShift);
166*6467f958SSadaf Ebrahimi             else
167*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)(data << -mantShift);
168*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
169*6467f958SSadaf Ebrahimi 
170*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
171*6467f958SSadaf Ebrahimi             return as_float(result);
172*6467f958SSadaf Ebrahimi             break;
173*6467f958SSadaf Ebrahimi         }
174*6467f958SSadaf Ebrahimi         case qcomRTE: return (float)(data); break;
175*6467f958SSadaf Ebrahimi         case qcomRTP: {
176*6467f958SSadaf Ebrahimi             int inExact = 0;
177*6467f958SSadaf Ebrahimi             if (!data)
178*6467f958SSadaf Ebrahimi                 return 0.0f;
179*6467f958SSadaf Ebrahimi             uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
180*6467f958SSadaf Ebrahimi             int         mantShift   = 40 - clz(data);
181*6467f958SSadaf Ebrahimi             uint32_t    mantissa;
182*6467f958SSadaf Ebrahimi             if (mantShift >= 0){
183*6467f958SSadaf Ebrahimi                 uint64_t temp = data >> mantShift;
184*6467f958SSadaf Ebrahimi                 if (temp << mantShift != data)
185*6467f958SSadaf Ebrahimi                     inExact = 1;
186*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)temp;
187*6467f958SSadaf Ebrahimi             }
188*6467f958SSadaf Ebrahimi             else
189*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)(data << -mantShift);
190*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
191*6467f958SSadaf Ebrahimi 
192*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
193*6467f958SSadaf Ebrahimi             if(inExact){ // for positive inputs return higher next fp
194*6467f958SSadaf Ebrahimi                 uint32_t high_float = 0x7f7fffff;
195*6467f958SSadaf Ebrahimi                 return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
196*6467f958SSadaf Ebrahimi             }
197*6467f958SSadaf Ebrahimi             else
198*6467f958SSadaf Ebrahimi                 return as_float(result);
199*6467f958SSadaf Ebrahimi         }
200*6467f958SSadaf Ebrahimi         case qcomRTN: {
201*6467f958SSadaf Ebrahimi             int inExact = 0;
202*6467f958SSadaf Ebrahimi             if (!data)
203*6467f958SSadaf Ebrahimi                 return 0.0f;
204*6467f958SSadaf Ebrahimi             uint32_t  exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
205*6467f958SSadaf Ebrahimi             int       mantShift   = 40 - clz(data);
206*6467f958SSadaf Ebrahimi             uint32_t  mantissa;
207*6467f958SSadaf Ebrahimi             if (mantShift >= 0){
208*6467f958SSadaf Ebrahimi                 uint64_t temp = (uint64_t)data >> mantShift;
209*6467f958SSadaf Ebrahimi                 if (temp << mantShift != data)
210*6467f958SSadaf Ebrahimi                     inExact = 1;
211*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)temp;
212*6467f958SSadaf Ebrahimi             }
213*6467f958SSadaf Ebrahimi             else
214*6467f958SSadaf Ebrahimi                 mantissa = (uint32_t)((uint64_t)data << -mantShift);
215*6467f958SSadaf Ebrahimi             mantissa &= 0x7fffff;//mask off the leading 1
216*6467f958SSadaf Ebrahimi 
217*6467f958SSadaf Ebrahimi             uint32_t result = exponent | mantissa;
218*6467f958SSadaf Ebrahimi             return as_float(result); // for positive inputs return RTZ result
219*6467f958SSadaf Ebrahimi         }
220*6467f958SSadaf Ebrahimi         case qcomRoundingModeCount: {
221*6467f958SSadaf Ebrahimi             break; // Avoid build error for unhandled enum value
222*6467f958SSadaf Ebrahimi         }
223*6467f958SSadaf Ebrahimi     }
224*6467f958SSadaf Ebrahimi     return 0.0f;
225*6467f958SSadaf Ebrahimi }
226