xref: /aosp_15_r20/external/vixl/src/utils-vixl.cc (revision f5c631da2f1efdd72b5fd1e20510e4042af13d77)
1*f5c631daSSadaf Ebrahimi // Copyright 2015, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi //   * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi //   * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi //     this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi //     and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi //   * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi //     used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi //     specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi 
27*f5c631daSSadaf Ebrahimi #include <cstdio>
28*f5c631daSSadaf Ebrahimi 
29*f5c631daSSadaf Ebrahimi #include "utils-vixl.h"
30*f5c631daSSadaf Ebrahimi 
31*f5c631daSSadaf Ebrahimi namespace vixl {
32*f5c631daSSadaf Ebrahimi 
33*f5c631daSSadaf Ebrahimi // The default NaN values (for FPCR.DN=1).
34*f5c631daSSadaf Ebrahimi const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
35*f5c631daSSadaf Ebrahimi const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
36*f5c631daSSadaf Ebrahimi const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
37*f5c631daSSadaf Ebrahimi 
38*f5c631daSSadaf Ebrahimi // Floating-point zero values.
39*f5c631daSSadaf Ebrahimi const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
40*f5c631daSSadaf Ebrahimi const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
41*f5c631daSSadaf Ebrahimi 
42*f5c631daSSadaf Ebrahimi // Floating-point infinity values.
43*f5c631daSSadaf Ebrahimi const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
44*f5c631daSSadaf Ebrahimi const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
45*f5c631daSSadaf Ebrahimi const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
46*f5c631daSSadaf Ebrahimi const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
47*f5c631daSSadaf Ebrahimi const double kFP64PositiveInfinity =
48*f5c631daSSadaf Ebrahimi     RawbitsToDouble(UINT64_C(0x7ff0000000000000));
49*f5c631daSSadaf Ebrahimi const double kFP64NegativeInfinity =
50*f5c631daSSadaf Ebrahimi     RawbitsToDouble(UINT64_C(0xfff0000000000000));
51*f5c631daSSadaf Ebrahimi 
IsZero(Float16 value)52*f5c631daSSadaf Ebrahimi bool IsZero(Float16 value) {
53*f5c631daSSadaf Ebrahimi   uint16_t bits = Float16ToRawbits(value);
54*f5c631daSSadaf Ebrahimi   return (bits == Float16ToRawbits(kFP16PositiveZero) ||
55*f5c631daSSadaf Ebrahimi           bits == Float16ToRawbits(kFP16NegativeZero));
56*f5c631daSSadaf Ebrahimi }
57*f5c631daSSadaf Ebrahimi 
Float16ToRawbits(Float16 value)58*f5c631daSSadaf Ebrahimi uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
59*f5c631daSSadaf Ebrahimi 
FloatToRawbits(float value)60*f5c631daSSadaf Ebrahimi uint32_t FloatToRawbits(float value) {
61*f5c631daSSadaf Ebrahimi   uint32_t bits = 0;
62*f5c631daSSadaf Ebrahimi   memcpy(&bits, &value, 4);
63*f5c631daSSadaf Ebrahimi   return bits;
64*f5c631daSSadaf Ebrahimi }
65*f5c631daSSadaf Ebrahimi 
66*f5c631daSSadaf Ebrahimi 
DoubleToRawbits(double value)67*f5c631daSSadaf Ebrahimi uint64_t DoubleToRawbits(double value) {
68*f5c631daSSadaf Ebrahimi   uint64_t bits = 0;
69*f5c631daSSadaf Ebrahimi   memcpy(&bits, &value, 8);
70*f5c631daSSadaf Ebrahimi   return bits;
71*f5c631daSSadaf Ebrahimi }
72*f5c631daSSadaf Ebrahimi 
73*f5c631daSSadaf Ebrahimi 
RawbitsToFloat16(uint16_t bits)74*f5c631daSSadaf Ebrahimi Float16 RawbitsToFloat16(uint16_t bits) {
75*f5c631daSSadaf Ebrahimi   Float16 f;
76*f5c631daSSadaf Ebrahimi   f.rawbits_ = bits;
77*f5c631daSSadaf Ebrahimi   return f;
78*f5c631daSSadaf Ebrahimi }
79*f5c631daSSadaf Ebrahimi 
80*f5c631daSSadaf Ebrahimi 
RawbitsToFloat(uint32_t bits)81*f5c631daSSadaf Ebrahimi float RawbitsToFloat(uint32_t bits) {
82*f5c631daSSadaf Ebrahimi   float value = 0.0;
83*f5c631daSSadaf Ebrahimi   memcpy(&value, &bits, 4);
84*f5c631daSSadaf Ebrahimi   return value;
85*f5c631daSSadaf Ebrahimi }
86*f5c631daSSadaf Ebrahimi 
87*f5c631daSSadaf Ebrahimi 
RawbitsToDouble(uint64_t bits)88*f5c631daSSadaf Ebrahimi double RawbitsToDouble(uint64_t bits) {
89*f5c631daSSadaf Ebrahimi   double value = 0.0;
90*f5c631daSSadaf Ebrahimi   memcpy(&value, &bits, 8);
91*f5c631daSSadaf Ebrahimi   return value;
92*f5c631daSSadaf Ebrahimi }
93*f5c631daSSadaf Ebrahimi 
94*f5c631daSSadaf Ebrahimi 
Float16Sign(internal::SimFloat16 val)95*f5c631daSSadaf Ebrahimi uint32_t Float16Sign(internal::SimFloat16 val) {
96*f5c631daSSadaf Ebrahimi   uint16_t rawbits = Float16ToRawbits(val);
97*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(15, 15, rawbits);
98*f5c631daSSadaf Ebrahimi }
99*f5c631daSSadaf Ebrahimi 
100*f5c631daSSadaf Ebrahimi 
Float16Exp(internal::SimFloat16 val)101*f5c631daSSadaf Ebrahimi uint32_t Float16Exp(internal::SimFloat16 val) {
102*f5c631daSSadaf Ebrahimi   uint16_t rawbits = Float16ToRawbits(val);
103*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(14, 10, rawbits);
104*f5c631daSSadaf Ebrahimi }
105*f5c631daSSadaf Ebrahimi 
Float16Mantissa(internal::SimFloat16 val)106*f5c631daSSadaf Ebrahimi uint32_t Float16Mantissa(internal::SimFloat16 val) {
107*f5c631daSSadaf Ebrahimi   uint16_t rawbits = Float16ToRawbits(val);
108*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(9, 0, rawbits);
109*f5c631daSSadaf Ebrahimi }
110*f5c631daSSadaf Ebrahimi 
111*f5c631daSSadaf Ebrahimi 
FloatSign(float val)112*f5c631daSSadaf Ebrahimi uint32_t FloatSign(float val) {
113*f5c631daSSadaf Ebrahimi   uint32_t rawbits = FloatToRawbits(val);
114*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(31, 31, rawbits);
115*f5c631daSSadaf Ebrahimi }
116*f5c631daSSadaf Ebrahimi 
117*f5c631daSSadaf Ebrahimi 
FloatExp(float val)118*f5c631daSSadaf Ebrahimi uint32_t FloatExp(float val) {
119*f5c631daSSadaf Ebrahimi   uint32_t rawbits = FloatToRawbits(val);
120*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(30, 23, rawbits);
121*f5c631daSSadaf Ebrahimi }
122*f5c631daSSadaf Ebrahimi 
123*f5c631daSSadaf Ebrahimi 
FloatMantissa(float val)124*f5c631daSSadaf Ebrahimi uint32_t FloatMantissa(float val) {
125*f5c631daSSadaf Ebrahimi   uint32_t rawbits = FloatToRawbits(val);
126*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield32(22, 0, rawbits);
127*f5c631daSSadaf Ebrahimi }
128*f5c631daSSadaf Ebrahimi 
129*f5c631daSSadaf Ebrahimi 
DoubleSign(double val)130*f5c631daSSadaf Ebrahimi uint32_t DoubleSign(double val) {
131*f5c631daSSadaf Ebrahimi   uint64_t rawbits = DoubleToRawbits(val);
132*f5c631daSSadaf Ebrahimi   return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
133*f5c631daSSadaf Ebrahimi }
134*f5c631daSSadaf Ebrahimi 
135*f5c631daSSadaf Ebrahimi 
DoubleExp(double val)136*f5c631daSSadaf Ebrahimi uint32_t DoubleExp(double val) {
137*f5c631daSSadaf Ebrahimi   uint64_t rawbits = DoubleToRawbits(val);
138*f5c631daSSadaf Ebrahimi   return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
139*f5c631daSSadaf Ebrahimi }
140*f5c631daSSadaf Ebrahimi 
141*f5c631daSSadaf Ebrahimi 
DoubleMantissa(double val)142*f5c631daSSadaf Ebrahimi uint64_t DoubleMantissa(double val) {
143*f5c631daSSadaf Ebrahimi   uint64_t rawbits = DoubleToRawbits(val);
144*f5c631daSSadaf Ebrahimi   return ExtractUnsignedBitfield64(51, 0, rawbits);
145*f5c631daSSadaf Ebrahimi }
146*f5c631daSSadaf Ebrahimi 
147*f5c631daSSadaf Ebrahimi 
Float16Pack(uint16_t sign,uint16_t exp,uint16_t mantissa)148*f5c631daSSadaf Ebrahimi internal::SimFloat16 Float16Pack(uint16_t sign,
149*f5c631daSSadaf Ebrahimi                                  uint16_t exp,
150*f5c631daSSadaf Ebrahimi                                  uint16_t mantissa) {
151*f5c631daSSadaf Ebrahimi   uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
152*f5c631daSSadaf Ebrahimi   return RawbitsToFloat16(bits);
153*f5c631daSSadaf Ebrahimi }
154*f5c631daSSadaf Ebrahimi 
155*f5c631daSSadaf Ebrahimi 
FloatPack(uint32_t sign,uint32_t exp,uint32_t mantissa)156*f5c631daSSadaf Ebrahimi float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
157*f5c631daSSadaf Ebrahimi   uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
158*f5c631daSSadaf Ebrahimi   return RawbitsToFloat(bits);
159*f5c631daSSadaf Ebrahimi }
160*f5c631daSSadaf Ebrahimi 
161*f5c631daSSadaf Ebrahimi 
DoublePack(uint64_t sign,uint64_t exp,uint64_t mantissa)162*f5c631daSSadaf Ebrahimi double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
163*f5c631daSSadaf Ebrahimi   uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
164*f5c631daSSadaf Ebrahimi   return RawbitsToDouble(bits);
165*f5c631daSSadaf Ebrahimi }
166*f5c631daSSadaf Ebrahimi 
167*f5c631daSSadaf Ebrahimi 
Float16Classify(Float16 value)168*f5c631daSSadaf Ebrahimi int Float16Classify(Float16 value) {
169*f5c631daSSadaf Ebrahimi   uint16_t bits = Float16ToRawbits(value);
170*f5c631daSSadaf Ebrahimi   uint16_t exponent_max = (1 << 5) - 1;
171*f5c631daSSadaf Ebrahimi   uint16_t exponent_mask = exponent_max << 10;
172*f5c631daSSadaf Ebrahimi   uint16_t mantissa_mask = (1 << 10) - 1;
173*f5c631daSSadaf Ebrahimi 
174*f5c631daSSadaf Ebrahimi   uint16_t exponent = (bits & exponent_mask) >> 10;
175*f5c631daSSadaf Ebrahimi   uint16_t mantissa = bits & mantissa_mask;
176*f5c631daSSadaf Ebrahimi   if (exponent == 0) {
177*f5c631daSSadaf Ebrahimi     if (mantissa == 0) {
178*f5c631daSSadaf Ebrahimi       return FP_ZERO;
179*f5c631daSSadaf Ebrahimi     }
180*f5c631daSSadaf Ebrahimi     return FP_SUBNORMAL;
181*f5c631daSSadaf Ebrahimi   } else if (exponent == exponent_max) {
182*f5c631daSSadaf Ebrahimi     if (mantissa == 0) {
183*f5c631daSSadaf Ebrahimi       return FP_INFINITE;
184*f5c631daSSadaf Ebrahimi     }
185*f5c631daSSadaf Ebrahimi     return FP_NAN;
186*f5c631daSSadaf Ebrahimi   }
187*f5c631daSSadaf Ebrahimi   return FP_NORMAL;
188*f5c631daSSadaf Ebrahimi }
189*f5c631daSSadaf Ebrahimi 
190*f5c631daSSadaf Ebrahimi 
CountClearHalfWords(uint64_t imm,unsigned reg_size)191*f5c631daSSadaf Ebrahimi unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
192*f5c631daSSadaf Ebrahimi   VIXL_ASSERT((reg_size % 8) == 0);
193*f5c631daSSadaf Ebrahimi   int count = 0;
194*f5c631daSSadaf Ebrahimi   for (unsigned i = 0; i < (reg_size / 16); i++) {
195*f5c631daSSadaf Ebrahimi     if ((imm & 0xffff) == 0) {
196*f5c631daSSadaf Ebrahimi       count++;
197*f5c631daSSadaf Ebrahimi     }
198*f5c631daSSadaf Ebrahimi     imm >>= 16;
199*f5c631daSSadaf Ebrahimi   }
200*f5c631daSSadaf Ebrahimi   return count;
201*f5c631daSSadaf Ebrahimi }
202*f5c631daSSadaf Ebrahimi 
203*f5c631daSSadaf Ebrahimi 
BitCount(uint64_t value)204*f5c631daSSadaf Ebrahimi int BitCount(uint64_t value) { return CountSetBits(value); }
205*f5c631daSSadaf Ebrahimi 
206*f5c631daSSadaf Ebrahimi // Float16 definitions.
207*f5c631daSSadaf Ebrahimi 
Float16(double dvalue)208*f5c631daSSadaf Ebrahimi Float16::Float16(double dvalue) {
209*f5c631daSSadaf Ebrahimi   rawbits_ =
210*f5c631daSSadaf Ebrahimi       Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
211*f5c631daSSadaf Ebrahimi }
212*f5c631daSSadaf Ebrahimi 
213*f5c631daSSadaf Ebrahimi namespace internal {
214*f5c631daSSadaf Ebrahimi 
operator -() const215*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator-() const {
216*f5c631daSSadaf Ebrahimi   return RawbitsToFloat16(rawbits_ ^ 0x8000);
217*f5c631daSSadaf Ebrahimi }
218*f5c631daSSadaf Ebrahimi 
219*f5c631daSSadaf Ebrahimi // SimFloat16 definitions.
operator +(SimFloat16 rhs) const220*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
221*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) + static_cast<double>(rhs);
222*f5c631daSSadaf Ebrahimi }
223*f5c631daSSadaf Ebrahimi 
operator -(SimFloat16 rhs) const224*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
225*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) - static_cast<double>(rhs);
226*f5c631daSSadaf Ebrahimi }
227*f5c631daSSadaf Ebrahimi 
operator *(SimFloat16 rhs) const228*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
229*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) * static_cast<double>(rhs);
230*f5c631daSSadaf Ebrahimi }
231*f5c631daSSadaf Ebrahimi 
operator /(SimFloat16 rhs) const232*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
233*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) / static_cast<double>(rhs);
234*f5c631daSSadaf Ebrahimi }
235*f5c631daSSadaf Ebrahimi 
operator <(SimFloat16 rhs) const236*f5c631daSSadaf Ebrahimi bool SimFloat16::operator<(SimFloat16 rhs) const {
237*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) < static_cast<double>(rhs);
238*f5c631daSSadaf Ebrahimi }
239*f5c631daSSadaf Ebrahimi 
operator >(SimFloat16 rhs) const240*f5c631daSSadaf Ebrahimi bool SimFloat16::operator>(SimFloat16 rhs) const {
241*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) > static_cast<double>(rhs);
242*f5c631daSSadaf Ebrahimi }
243*f5c631daSSadaf Ebrahimi 
operator ==(SimFloat16 rhs) const244*f5c631daSSadaf Ebrahimi bool SimFloat16::operator==(SimFloat16 rhs) const {
245*f5c631daSSadaf Ebrahimi   if (IsNaN(*this) || IsNaN(rhs)) {
246*f5c631daSSadaf Ebrahimi     return false;
247*f5c631daSSadaf Ebrahimi   } else if (IsZero(rhs) && IsZero(*this)) {
248*f5c631daSSadaf Ebrahimi     // +0 and -0 should be treated as equal.
249*f5c631daSSadaf Ebrahimi     return true;
250*f5c631daSSadaf Ebrahimi   }
251*f5c631daSSadaf Ebrahimi   return this->rawbits_ == rhs.rawbits_;
252*f5c631daSSadaf Ebrahimi }
253*f5c631daSSadaf Ebrahimi 
operator !=(SimFloat16 rhs) const254*f5c631daSSadaf Ebrahimi bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
255*f5c631daSSadaf Ebrahimi 
operator ==(double rhs) const256*f5c631daSSadaf Ebrahimi bool SimFloat16::operator==(double rhs) const {
257*f5c631daSSadaf Ebrahimi   return static_cast<double>(*this) == static_cast<double>(rhs);
258*f5c631daSSadaf Ebrahimi }
259*f5c631daSSadaf Ebrahimi 
operator double() const260*f5c631daSSadaf Ebrahimi SimFloat16::operator double() const {
261*f5c631daSSadaf Ebrahimi   return FPToDouble(*this, kIgnoreDefaultNaN);
262*f5c631daSSadaf Ebrahimi }
263*f5c631daSSadaf Ebrahimi 
BitCount(Uint32 value)264*f5c631daSSadaf Ebrahimi Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
265*f5c631daSSadaf Ebrahimi 
266*f5c631daSSadaf Ebrahimi }  // namespace internal
267*f5c631daSSadaf Ebrahimi 
FPToFloat(Float16 value,UseDefaultNaN DN,bool * exception)268*f5c631daSSadaf Ebrahimi float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
269*f5c631daSSadaf Ebrahimi   uint16_t bits = Float16ToRawbits(value);
270*f5c631daSSadaf Ebrahimi   uint32_t sign = bits >> 15;
271*f5c631daSSadaf Ebrahimi   uint32_t exponent =
272*f5c631daSSadaf Ebrahimi       ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
273*f5c631daSSadaf Ebrahimi                                 kFloat16MantissaBits,
274*f5c631daSSadaf Ebrahimi                                 bits);
275*f5c631daSSadaf Ebrahimi   uint32_t mantissa =
276*f5c631daSSadaf Ebrahimi       ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
277*f5c631daSSadaf Ebrahimi 
278*f5c631daSSadaf Ebrahimi   switch (Float16Classify(value)) {
279*f5c631daSSadaf Ebrahimi     case FP_ZERO:
280*f5c631daSSadaf Ebrahimi       return (sign == 0) ? 0.0f : -0.0f;
281*f5c631daSSadaf Ebrahimi 
282*f5c631daSSadaf Ebrahimi     case FP_INFINITE:
283*f5c631daSSadaf Ebrahimi       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
284*f5c631daSSadaf Ebrahimi 
285*f5c631daSSadaf Ebrahimi     case FP_SUBNORMAL: {
286*f5c631daSSadaf Ebrahimi       // Calculate shift required to put mantissa into the most-significant bits
287*f5c631daSSadaf Ebrahimi       // of the destination mantissa.
288*f5c631daSSadaf Ebrahimi       int shift = CountLeadingZeros(mantissa << (32 - 10));
289*f5c631daSSadaf Ebrahimi 
290*f5c631daSSadaf Ebrahimi       // Shift mantissa and discard implicit '1'.
291*f5c631daSSadaf Ebrahimi       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
292*f5c631daSSadaf Ebrahimi       mantissa &= (1 << kFloatMantissaBits) - 1;
293*f5c631daSSadaf Ebrahimi 
294*f5c631daSSadaf Ebrahimi       // Adjust the exponent for the shift applied, and rebias.
295*f5c631daSSadaf Ebrahimi       exponent = exponent - shift + (-15 + 127);
296*f5c631daSSadaf Ebrahimi       break;
297*f5c631daSSadaf Ebrahimi     }
298*f5c631daSSadaf Ebrahimi 
299*f5c631daSSadaf Ebrahimi     case FP_NAN:
300*f5c631daSSadaf Ebrahimi       if (IsSignallingNaN(value)) {
301*f5c631daSSadaf Ebrahimi         if (exception != NULL) {
302*f5c631daSSadaf Ebrahimi           *exception = true;
303*f5c631daSSadaf Ebrahimi         }
304*f5c631daSSadaf Ebrahimi       }
305*f5c631daSSadaf Ebrahimi       if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
306*f5c631daSSadaf Ebrahimi 
307*f5c631daSSadaf Ebrahimi       // Convert NaNs as the processor would:
308*f5c631daSSadaf Ebrahimi       //  - The sign is propagated.
309*f5c631daSSadaf Ebrahimi       //  - The payload (mantissa) is transferred entirely, except that the top
310*f5c631daSSadaf Ebrahimi       //    bit is forced to '1', making the result a quiet NaN. The unused
311*f5c631daSSadaf Ebrahimi       //    (low-order) payload bits are set to 0.
312*f5c631daSSadaf Ebrahimi       exponent = (1 << kFloatExponentBits) - 1;
313*f5c631daSSadaf Ebrahimi 
314*f5c631daSSadaf Ebrahimi       // Increase bits in mantissa, making low-order bits 0.
315*f5c631daSSadaf Ebrahimi       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
316*f5c631daSSadaf Ebrahimi       mantissa |= 1 << 22;  // Force a quiet NaN.
317*f5c631daSSadaf Ebrahimi       break;
318*f5c631daSSadaf Ebrahimi 
319*f5c631daSSadaf Ebrahimi     case FP_NORMAL:
320*f5c631daSSadaf Ebrahimi       // Increase bits in mantissa, making low-order bits 0.
321*f5c631daSSadaf Ebrahimi       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
322*f5c631daSSadaf Ebrahimi 
323*f5c631daSSadaf Ebrahimi       // Change exponent bias.
324*f5c631daSSadaf Ebrahimi       exponent += (-15 + 127);
325*f5c631daSSadaf Ebrahimi       break;
326*f5c631daSSadaf Ebrahimi 
327*f5c631daSSadaf Ebrahimi     default:
328*f5c631daSSadaf Ebrahimi       VIXL_UNREACHABLE();
329*f5c631daSSadaf Ebrahimi   }
330*f5c631daSSadaf Ebrahimi   return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
331*f5c631daSSadaf Ebrahimi                         mantissa);
332*f5c631daSSadaf Ebrahimi }
333*f5c631daSSadaf Ebrahimi 
334*f5c631daSSadaf Ebrahimi 
FPToFloat(double value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)335*f5c631daSSadaf Ebrahimi float FPToFloat(double value,
336*f5c631daSSadaf Ebrahimi                 FPRounding round_mode,
337*f5c631daSSadaf Ebrahimi                 UseDefaultNaN DN,
338*f5c631daSSadaf Ebrahimi                 bool* exception) {
339*f5c631daSSadaf Ebrahimi   // Only the FPTieEven rounding mode is implemented.
340*f5c631daSSadaf Ebrahimi   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
341*f5c631daSSadaf Ebrahimi   USE(round_mode);
342*f5c631daSSadaf Ebrahimi 
343*f5c631daSSadaf Ebrahimi   switch (std::fpclassify(value)) {
344*f5c631daSSadaf Ebrahimi     case FP_NAN: {
345*f5c631daSSadaf Ebrahimi       if (IsSignallingNaN(value)) {
346*f5c631daSSadaf Ebrahimi         if (exception != NULL) {
347*f5c631daSSadaf Ebrahimi           *exception = true;
348*f5c631daSSadaf Ebrahimi         }
349*f5c631daSSadaf Ebrahimi       }
350*f5c631daSSadaf Ebrahimi       if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
351*f5c631daSSadaf Ebrahimi 
352*f5c631daSSadaf Ebrahimi       // Convert NaNs as the processor would:
353*f5c631daSSadaf Ebrahimi       //  - The sign is propagated.
354*f5c631daSSadaf Ebrahimi       //  - The payload (mantissa) is transferred as much as possible, except
355*f5c631daSSadaf Ebrahimi       //    that the top bit is forced to '1', making the result a quiet NaN.
356*f5c631daSSadaf Ebrahimi       uint64_t raw = DoubleToRawbits(value);
357*f5c631daSSadaf Ebrahimi 
358*f5c631daSSadaf Ebrahimi       uint32_t sign = raw >> 63;
359*f5c631daSSadaf Ebrahimi       uint32_t exponent = (1 << 8) - 1;
360*f5c631daSSadaf Ebrahimi       uint32_t payload =
361*f5c631daSSadaf Ebrahimi           static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
362*f5c631daSSadaf Ebrahimi       payload |= (1 << 22);  // Force a quiet NaN.
363*f5c631daSSadaf Ebrahimi 
364*f5c631daSSadaf Ebrahimi       return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
365*f5c631daSSadaf Ebrahimi     }
366*f5c631daSSadaf Ebrahimi 
367*f5c631daSSadaf Ebrahimi     case FP_ZERO:
368*f5c631daSSadaf Ebrahimi     case FP_INFINITE: {
369*f5c631daSSadaf Ebrahimi       // In a C++ cast, any value representable in the target type will be
370*f5c631daSSadaf Ebrahimi       // unchanged. This is always the case for +/-0.0 and infinities.
371*f5c631daSSadaf Ebrahimi       return static_cast<float>(value);
372*f5c631daSSadaf Ebrahimi     }
373*f5c631daSSadaf Ebrahimi 
374*f5c631daSSadaf Ebrahimi     case FP_NORMAL:
375*f5c631daSSadaf Ebrahimi     case FP_SUBNORMAL: {
376*f5c631daSSadaf Ebrahimi       // Convert double-to-float as the processor would, assuming that FPCR.FZ
377*f5c631daSSadaf Ebrahimi       // (flush-to-zero) is not set.
378*f5c631daSSadaf Ebrahimi       uint64_t raw = DoubleToRawbits(value);
379*f5c631daSSadaf Ebrahimi       // Extract the IEEE-754 double components.
380*f5c631daSSadaf Ebrahimi       uint32_t sign = raw >> 63;
381*f5c631daSSadaf Ebrahimi       // Extract the exponent and remove the IEEE-754 encoding bias.
382*f5c631daSSadaf Ebrahimi       int32_t exponent =
383*f5c631daSSadaf Ebrahimi           static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
384*f5c631daSSadaf Ebrahimi       // Extract the mantissa and add the implicit '1' bit.
385*f5c631daSSadaf Ebrahimi       uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
386*f5c631daSSadaf Ebrahimi       if (std::fpclassify(value) == FP_NORMAL) {
387*f5c631daSSadaf Ebrahimi         mantissa |= (UINT64_C(1) << 52);
388*f5c631daSSadaf Ebrahimi       }
389*f5c631daSSadaf Ebrahimi       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
390*f5c631daSSadaf Ebrahimi     }
391*f5c631daSSadaf Ebrahimi   }
392*f5c631daSSadaf Ebrahimi 
393*f5c631daSSadaf Ebrahimi   VIXL_UNREACHABLE();
394*f5c631daSSadaf Ebrahimi   return value;
395*f5c631daSSadaf Ebrahimi }
396*f5c631daSSadaf Ebrahimi 
397*f5c631daSSadaf Ebrahimi // TODO: We should consider implementing a full FPToDouble(Float16)
398*f5c631daSSadaf Ebrahimi // conversion function (for performance reasons).
FPToDouble(Float16 value,UseDefaultNaN DN,bool * exception)399*f5c631daSSadaf Ebrahimi double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
400*f5c631daSSadaf Ebrahimi   // We can rely on implicit float to double conversion here.
401*f5c631daSSadaf Ebrahimi   return FPToFloat(value, DN, exception);
402*f5c631daSSadaf Ebrahimi }
403*f5c631daSSadaf Ebrahimi 
404*f5c631daSSadaf Ebrahimi 
FPToDouble(float value,UseDefaultNaN DN,bool * exception)405*f5c631daSSadaf Ebrahimi double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
406*f5c631daSSadaf Ebrahimi   switch (std::fpclassify(value)) {
407*f5c631daSSadaf Ebrahimi     case FP_NAN: {
408*f5c631daSSadaf Ebrahimi       if (IsSignallingNaN(value)) {
409*f5c631daSSadaf Ebrahimi         if (exception != NULL) {
410*f5c631daSSadaf Ebrahimi           *exception = true;
411*f5c631daSSadaf Ebrahimi         }
412*f5c631daSSadaf Ebrahimi       }
413*f5c631daSSadaf Ebrahimi       if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
414*f5c631daSSadaf Ebrahimi 
415*f5c631daSSadaf Ebrahimi       // Convert NaNs as the processor would:
416*f5c631daSSadaf Ebrahimi       //  - The sign is propagated.
417*f5c631daSSadaf Ebrahimi       //  - The payload (mantissa) is transferred entirely, except that the top
418*f5c631daSSadaf Ebrahimi       //    bit is forced to '1', making the result a quiet NaN. The unused
419*f5c631daSSadaf Ebrahimi       //    (low-order) payload bits are set to 0.
420*f5c631daSSadaf Ebrahimi       uint32_t raw = FloatToRawbits(value);
421*f5c631daSSadaf Ebrahimi 
422*f5c631daSSadaf Ebrahimi       uint64_t sign = raw >> 31;
423*f5c631daSSadaf Ebrahimi       uint64_t exponent = (1 << 11) - 1;
424*f5c631daSSadaf Ebrahimi       uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
425*f5c631daSSadaf Ebrahimi       payload <<= (52 - 23);           // The unused low-order bits should be 0.
426*f5c631daSSadaf Ebrahimi       payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
427*f5c631daSSadaf Ebrahimi 
428*f5c631daSSadaf Ebrahimi       return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
429*f5c631daSSadaf Ebrahimi     }
430*f5c631daSSadaf Ebrahimi 
431*f5c631daSSadaf Ebrahimi     case FP_ZERO:
432*f5c631daSSadaf Ebrahimi     case FP_NORMAL:
433*f5c631daSSadaf Ebrahimi     case FP_SUBNORMAL:
434*f5c631daSSadaf Ebrahimi     case FP_INFINITE: {
435*f5c631daSSadaf Ebrahimi       // All other inputs are preserved in a standard cast, because every value
436*f5c631daSSadaf Ebrahimi       // representable using an IEEE-754 float is also representable using an
437*f5c631daSSadaf Ebrahimi       // IEEE-754 double.
438*f5c631daSSadaf Ebrahimi       return static_cast<double>(value);
439*f5c631daSSadaf Ebrahimi     }
440*f5c631daSSadaf Ebrahimi   }
441*f5c631daSSadaf Ebrahimi 
442*f5c631daSSadaf Ebrahimi   VIXL_UNREACHABLE();
443*f5c631daSSadaf Ebrahimi   return static_cast<double>(value);
444*f5c631daSSadaf Ebrahimi }
445*f5c631daSSadaf Ebrahimi 
446*f5c631daSSadaf Ebrahimi 
FPToFloat16(float value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)447*f5c631daSSadaf Ebrahimi Float16 FPToFloat16(float value,
448*f5c631daSSadaf Ebrahimi                     FPRounding round_mode,
449*f5c631daSSadaf Ebrahimi                     UseDefaultNaN DN,
450*f5c631daSSadaf Ebrahimi                     bool* exception) {
451*f5c631daSSadaf Ebrahimi   // Only the FPTieEven rounding mode is implemented.
452*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(round_mode == FPTieEven);
453*f5c631daSSadaf Ebrahimi   USE(round_mode);
454*f5c631daSSadaf Ebrahimi 
455*f5c631daSSadaf Ebrahimi   uint32_t raw = FloatToRawbits(value);
456*f5c631daSSadaf Ebrahimi   int32_t sign = raw >> 31;
457*f5c631daSSadaf Ebrahimi   int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
458*f5c631daSSadaf Ebrahimi   uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
459*f5c631daSSadaf Ebrahimi 
460*f5c631daSSadaf Ebrahimi   switch (std::fpclassify(value)) {
461*f5c631daSSadaf Ebrahimi     case FP_NAN: {
462*f5c631daSSadaf Ebrahimi       if (IsSignallingNaN(value)) {
463*f5c631daSSadaf Ebrahimi         if (exception != NULL) {
464*f5c631daSSadaf Ebrahimi           *exception = true;
465*f5c631daSSadaf Ebrahimi         }
466*f5c631daSSadaf Ebrahimi       }
467*f5c631daSSadaf Ebrahimi       if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
468*f5c631daSSadaf Ebrahimi 
469*f5c631daSSadaf Ebrahimi       // Convert NaNs as the processor would:
470*f5c631daSSadaf Ebrahimi       //  - The sign is propagated.
471*f5c631daSSadaf Ebrahimi       //  - The payload (mantissa) is transferred as much as possible, except
472*f5c631daSSadaf Ebrahimi       //    that the top bit is forced to '1', making the result a quiet NaN.
473*f5c631daSSadaf Ebrahimi       uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
474*f5c631daSSadaf Ebrahimi                                     : Float16ToRawbits(kFP16NegativeInfinity);
475*f5c631daSSadaf Ebrahimi       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
476*f5c631daSSadaf Ebrahimi       result |= (1 << 9);  // Force a quiet NaN;
477*f5c631daSSadaf Ebrahimi       return RawbitsToFloat16(result);
478*f5c631daSSadaf Ebrahimi     }
479*f5c631daSSadaf Ebrahimi 
480*f5c631daSSadaf Ebrahimi     case FP_ZERO:
481*f5c631daSSadaf Ebrahimi       return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
482*f5c631daSSadaf Ebrahimi 
483*f5c631daSSadaf Ebrahimi     case FP_INFINITE:
484*f5c631daSSadaf Ebrahimi       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
485*f5c631daSSadaf Ebrahimi 
486*f5c631daSSadaf Ebrahimi     case FP_NORMAL:
487*f5c631daSSadaf Ebrahimi     case FP_SUBNORMAL: {
488*f5c631daSSadaf Ebrahimi       // Convert float-to-half as the processor would, assuming that FPCR.FZ
489*f5c631daSSadaf Ebrahimi       // (flush-to-zero) is not set.
490*f5c631daSSadaf Ebrahimi 
491*f5c631daSSadaf Ebrahimi       // Add the implicit '1' bit to the mantissa.
492*f5c631daSSadaf Ebrahimi       mantissa += (1 << 23);
493*f5c631daSSadaf Ebrahimi       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
494*f5c631daSSadaf Ebrahimi     }
495*f5c631daSSadaf Ebrahimi   }
496*f5c631daSSadaf Ebrahimi 
497*f5c631daSSadaf Ebrahimi   VIXL_UNREACHABLE();
498*f5c631daSSadaf Ebrahimi   return kFP16PositiveZero;
499*f5c631daSSadaf Ebrahimi }
500*f5c631daSSadaf Ebrahimi 
501*f5c631daSSadaf Ebrahimi 
FPToFloat16(double value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)502*f5c631daSSadaf Ebrahimi Float16 FPToFloat16(double value,
503*f5c631daSSadaf Ebrahimi                     FPRounding round_mode,
504*f5c631daSSadaf Ebrahimi                     UseDefaultNaN DN,
505*f5c631daSSadaf Ebrahimi                     bool* exception) {
506*f5c631daSSadaf Ebrahimi   // Only the FPTieEven rounding mode is implemented.
507*f5c631daSSadaf Ebrahimi   VIXL_ASSERT(round_mode == FPTieEven);
508*f5c631daSSadaf Ebrahimi   USE(round_mode);
509*f5c631daSSadaf Ebrahimi 
510*f5c631daSSadaf Ebrahimi   uint64_t raw = DoubleToRawbits(value);
511*f5c631daSSadaf Ebrahimi   int32_t sign = raw >> 63;
512*f5c631daSSadaf Ebrahimi   int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
513*f5c631daSSadaf Ebrahimi   uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
514*f5c631daSSadaf Ebrahimi 
515*f5c631daSSadaf Ebrahimi   switch (std::fpclassify(value)) {
516*f5c631daSSadaf Ebrahimi     case FP_NAN: {
517*f5c631daSSadaf Ebrahimi       if (IsSignallingNaN(value)) {
518*f5c631daSSadaf Ebrahimi         if (exception != NULL) {
519*f5c631daSSadaf Ebrahimi           *exception = true;
520*f5c631daSSadaf Ebrahimi         }
521*f5c631daSSadaf Ebrahimi       }
522*f5c631daSSadaf Ebrahimi       if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
523*f5c631daSSadaf Ebrahimi 
524*f5c631daSSadaf Ebrahimi       // Convert NaNs as the processor would:
525*f5c631daSSadaf Ebrahimi       //  - The sign is propagated.
526*f5c631daSSadaf Ebrahimi       //  - The payload (mantissa) is transferred as much as possible, except
527*f5c631daSSadaf Ebrahimi       //    that the top bit is forced to '1', making the result a quiet NaN.
528*f5c631daSSadaf Ebrahimi       uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
529*f5c631daSSadaf Ebrahimi                                     : Float16ToRawbits(kFP16NegativeInfinity);
530*f5c631daSSadaf Ebrahimi       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
531*f5c631daSSadaf Ebrahimi       result |= (1 << 9);  // Force a quiet NaN;
532*f5c631daSSadaf Ebrahimi       return RawbitsToFloat16(result);
533*f5c631daSSadaf Ebrahimi     }
534*f5c631daSSadaf Ebrahimi 
535*f5c631daSSadaf Ebrahimi     case FP_ZERO:
536*f5c631daSSadaf Ebrahimi       return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
537*f5c631daSSadaf Ebrahimi 
538*f5c631daSSadaf Ebrahimi     case FP_INFINITE:
539*f5c631daSSadaf Ebrahimi       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
540*f5c631daSSadaf Ebrahimi     case FP_NORMAL:
541*f5c631daSSadaf Ebrahimi     case FP_SUBNORMAL: {
542*f5c631daSSadaf Ebrahimi       // Convert double-to-half as the processor would, assuming that FPCR.FZ
543*f5c631daSSadaf Ebrahimi       // (flush-to-zero) is not set.
544*f5c631daSSadaf Ebrahimi 
545*f5c631daSSadaf Ebrahimi       // Add the implicit '1' bit to the mantissa.
546*f5c631daSSadaf Ebrahimi       mantissa += (UINT64_C(1) << 52);
547*f5c631daSSadaf Ebrahimi       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
548*f5c631daSSadaf Ebrahimi     }
549*f5c631daSSadaf Ebrahimi   }
550*f5c631daSSadaf Ebrahimi 
551*f5c631daSSadaf Ebrahimi   VIXL_UNREACHABLE();
552*f5c631daSSadaf Ebrahimi   return kFP16PositiveZero;
553*f5c631daSSadaf Ebrahimi }
554*f5c631daSSadaf Ebrahimi 
555*f5c631daSSadaf Ebrahimi }  // namespace vixl
556