1*f5c631daSSadaf Ebrahimi // Copyright 2015, VIXL authors
2*f5c631daSSadaf Ebrahimi // All rights reserved.
3*f5c631daSSadaf Ebrahimi //
4*f5c631daSSadaf Ebrahimi // Redistribution and use in source and binary forms, with or without
5*f5c631daSSadaf Ebrahimi // modification, are permitted provided that the following conditions are met:
6*f5c631daSSadaf Ebrahimi //
7*f5c631daSSadaf Ebrahimi // * Redistributions of source code must retain the above copyright notice,
8*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer.
9*f5c631daSSadaf Ebrahimi // * Redistributions in binary form must reproduce the above copyright notice,
10*f5c631daSSadaf Ebrahimi // this list of conditions and the following disclaimer in the documentation
11*f5c631daSSadaf Ebrahimi // and/or other materials provided with the distribution.
12*f5c631daSSadaf Ebrahimi // * Neither the name of ARM Limited nor the names of its contributors may be
13*f5c631daSSadaf Ebrahimi // used to endorse or promote products derived from this software without
14*f5c631daSSadaf Ebrahimi // specific prior written permission.
15*f5c631daSSadaf Ebrahimi //
16*f5c631daSSadaf Ebrahimi // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17*f5c631daSSadaf Ebrahimi // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*f5c631daSSadaf Ebrahimi // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*f5c631daSSadaf Ebrahimi // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20*f5c631daSSadaf Ebrahimi // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*f5c631daSSadaf Ebrahimi // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22*f5c631daSSadaf Ebrahimi // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23*f5c631daSSadaf Ebrahimi // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24*f5c631daSSadaf Ebrahimi // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25*f5c631daSSadaf Ebrahimi // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*f5c631daSSadaf Ebrahimi
27*f5c631daSSadaf Ebrahimi #include <cstdio>
28*f5c631daSSadaf Ebrahimi
29*f5c631daSSadaf Ebrahimi #include "utils-vixl.h"
30*f5c631daSSadaf Ebrahimi
31*f5c631daSSadaf Ebrahimi namespace vixl {
32*f5c631daSSadaf Ebrahimi
33*f5c631daSSadaf Ebrahimi // The default NaN values (for FPCR.DN=1).
34*f5c631daSSadaf Ebrahimi const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
35*f5c631daSSadaf Ebrahimi const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
36*f5c631daSSadaf Ebrahimi const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
37*f5c631daSSadaf Ebrahimi
38*f5c631daSSadaf Ebrahimi // Floating-point zero values.
39*f5c631daSSadaf Ebrahimi const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
40*f5c631daSSadaf Ebrahimi const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
41*f5c631daSSadaf Ebrahimi
42*f5c631daSSadaf Ebrahimi // Floating-point infinity values.
43*f5c631daSSadaf Ebrahimi const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
44*f5c631daSSadaf Ebrahimi const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
45*f5c631daSSadaf Ebrahimi const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
46*f5c631daSSadaf Ebrahimi const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
47*f5c631daSSadaf Ebrahimi const double kFP64PositiveInfinity =
48*f5c631daSSadaf Ebrahimi RawbitsToDouble(UINT64_C(0x7ff0000000000000));
49*f5c631daSSadaf Ebrahimi const double kFP64NegativeInfinity =
50*f5c631daSSadaf Ebrahimi RawbitsToDouble(UINT64_C(0xfff0000000000000));
51*f5c631daSSadaf Ebrahimi
IsZero(Float16 value)52*f5c631daSSadaf Ebrahimi bool IsZero(Float16 value) {
53*f5c631daSSadaf Ebrahimi uint16_t bits = Float16ToRawbits(value);
54*f5c631daSSadaf Ebrahimi return (bits == Float16ToRawbits(kFP16PositiveZero) ||
55*f5c631daSSadaf Ebrahimi bits == Float16ToRawbits(kFP16NegativeZero));
56*f5c631daSSadaf Ebrahimi }
57*f5c631daSSadaf Ebrahimi
Float16ToRawbits(Float16 value)58*f5c631daSSadaf Ebrahimi uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
59*f5c631daSSadaf Ebrahimi
FloatToRawbits(float value)60*f5c631daSSadaf Ebrahimi uint32_t FloatToRawbits(float value) {
61*f5c631daSSadaf Ebrahimi uint32_t bits = 0;
62*f5c631daSSadaf Ebrahimi memcpy(&bits, &value, 4);
63*f5c631daSSadaf Ebrahimi return bits;
64*f5c631daSSadaf Ebrahimi }
65*f5c631daSSadaf Ebrahimi
66*f5c631daSSadaf Ebrahimi
DoubleToRawbits(double value)67*f5c631daSSadaf Ebrahimi uint64_t DoubleToRawbits(double value) {
68*f5c631daSSadaf Ebrahimi uint64_t bits = 0;
69*f5c631daSSadaf Ebrahimi memcpy(&bits, &value, 8);
70*f5c631daSSadaf Ebrahimi return bits;
71*f5c631daSSadaf Ebrahimi }
72*f5c631daSSadaf Ebrahimi
73*f5c631daSSadaf Ebrahimi
RawbitsToFloat16(uint16_t bits)74*f5c631daSSadaf Ebrahimi Float16 RawbitsToFloat16(uint16_t bits) {
75*f5c631daSSadaf Ebrahimi Float16 f;
76*f5c631daSSadaf Ebrahimi f.rawbits_ = bits;
77*f5c631daSSadaf Ebrahimi return f;
78*f5c631daSSadaf Ebrahimi }
79*f5c631daSSadaf Ebrahimi
80*f5c631daSSadaf Ebrahimi
RawbitsToFloat(uint32_t bits)81*f5c631daSSadaf Ebrahimi float RawbitsToFloat(uint32_t bits) {
82*f5c631daSSadaf Ebrahimi float value = 0.0;
83*f5c631daSSadaf Ebrahimi memcpy(&value, &bits, 4);
84*f5c631daSSadaf Ebrahimi return value;
85*f5c631daSSadaf Ebrahimi }
86*f5c631daSSadaf Ebrahimi
87*f5c631daSSadaf Ebrahimi
RawbitsToDouble(uint64_t bits)88*f5c631daSSadaf Ebrahimi double RawbitsToDouble(uint64_t bits) {
89*f5c631daSSadaf Ebrahimi double value = 0.0;
90*f5c631daSSadaf Ebrahimi memcpy(&value, &bits, 8);
91*f5c631daSSadaf Ebrahimi return value;
92*f5c631daSSadaf Ebrahimi }
93*f5c631daSSadaf Ebrahimi
94*f5c631daSSadaf Ebrahimi
Float16Sign(internal::SimFloat16 val)95*f5c631daSSadaf Ebrahimi uint32_t Float16Sign(internal::SimFloat16 val) {
96*f5c631daSSadaf Ebrahimi uint16_t rawbits = Float16ToRawbits(val);
97*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(15, 15, rawbits);
98*f5c631daSSadaf Ebrahimi }
99*f5c631daSSadaf Ebrahimi
100*f5c631daSSadaf Ebrahimi
Float16Exp(internal::SimFloat16 val)101*f5c631daSSadaf Ebrahimi uint32_t Float16Exp(internal::SimFloat16 val) {
102*f5c631daSSadaf Ebrahimi uint16_t rawbits = Float16ToRawbits(val);
103*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(14, 10, rawbits);
104*f5c631daSSadaf Ebrahimi }
105*f5c631daSSadaf Ebrahimi
Float16Mantissa(internal::SimFloat16 val)106*f5c631daSSadaf Ebrahimi uint32_t Float16Mantissa(internal::SimFloat16 val) {
107*f5c631daSSadaf Ebrahimi uint16_t rawbits = Float16ToRawbits(val);
108*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(9, 0, rawbits);
109*f5c631daSSadaf Ebrahimi }
110*f5c631daSSadaf Ebrahimi
111*f5c631daSSadaf Ebrahimi
FloatSign(float val)112*f5c631daSSadaf Ebrahimi uint32_t FloatSign(float val) {
113*f5c631daSSadaf Ebrahimi uint32_t rawbits = FloatToRawbits(val);
114*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(31, 31, rawbits);
115*f5c631daSSadaf Ebrahimi }
116*f5c631daSSadaf Ebrahimi
117*f5c631daSSadaf Ebrahimi
FloatExp(float val)118*f5c631daSSadaf Ebrahimi uint32_t FloatExp(float val) {
119*f5c631daSSadaf Ebrahimi uint32_t rawbits = FloatToRawbits(val);
120*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(30, 23, rawbits);
121*f5c631daSSadaf Ebrahimi }
122*f5c631daSSadaf Ebrahimi
123*f5c631daSSadaf Ebrahimi
FloatMantissa(float val)124*f5c631daSSadaf Ebrahimi uint32_t FloatMantissa(float val) {
125*f5c631daSSadaf Ebrahimi uint32_t rawbits = FloatToRawbits(val);
126*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield32(22, 0, rawbits);
127*f5c631daSSadaf Ebrahimi }
128*f5c631daSSadaf Ebrahimi
129*f5c631daSSadaf Ebrahimi
DoubleSign(double val)130*f5c631daSSadaf Ebrahimi uint32_t DoubleSign(double val) {
131*f5c631daSSadaf Ebrahimi uint64_t rawbits = DoubleToRawbits(val);
132*f5c631daSSadaf Ebrahimi return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
133*f5c631daSSadaf Ebrahimi }
134*f5c631daSSadaf Ebrahimi
135*f5c631daSSadaf Ebrahimi
DoubleExp(double val)136*f5c631daSSadaf Ebrahimi uint32_t DoubleExp(double val) {
137*f5c631daSSadaf Ebrahimi uint64_t rawbits = DoubleToRawbits(val);
138*f5c631daSSadaf Ebrahimi return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
139*f5c631daSSadaf Ebrahimi }
140*f5c631daSSadaf Ebrahimi
141*f5c631daSSadaf Ebrahimi
DoubleMantissa(double val)142*f5c631daSSadaf Ebrahimi uint64_t DoubleMantissa(double val) {
143*f5c631daSSadaf Ebrahimi uint64_t rawbits = DoubleToRawbits(val);
144*f5c631daSSadaf Ebrahimi return ExtractUnsignedBitfield64(51, 0, rawbits);
145*f5c631daSSadaf Ebrahimi }
146*f5c631daSSadaf Ebrahimi
147*f5c631daSSadaf Ebrahimi
Float16Pack(uint16_t sign,uint16_t exp,uint16_t mantissa)148*f5c631daSSadaf Ebrahimi internal::SimFloat16 Float16Pack(uint16_t sign,
149*f5c631daSSadaf Ebrahimi uint16_t exp,
150*f5c631daSSadaf Ebrahimi uint16_t mantissa) {
151*f5c631daSSadaf Ebrahimi uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
152*f5c631daSSadaf Ebrahimi return RawbitsToFloat16(bits);
153*f5c631daSSadaf Ebrahimi }
154*f5c631daSSadaf Ebrahimi
155*f5c631daSSadaf Ebrahimi
FloatPack(uint32_t sign,uint32_t exp,uint32_t mantissa)156*f5c631daSSadaf Ebrahimi float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
157*f5c631daSSadaf Ebrahimi uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
158*f5c631daSSadaf Ebrahimi return RawbitsToFloat(bits);
159*f5c631daSSadaf Ebrahimi }
160*f5c631daSSadaf Ebrahimi
161*f5c631daSSadaf Ebrahimi
DoublePack(uint64_t sign,uint64_t exp,uint64_t mantissa)162*f5c631daSSadaf Ebrahimi double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
163*f5c631daSSadaf Ebrahimi uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
164*f5c631daSSadaf Ebrahimi return RawbitsToDouble(bits);
165*f5c631daSSadaf Ebrahimi }
166*f5c631daSSadaf Ebrahimi
167*f5c631daSSadaf Ebrahimi
Float16Classify(Float16 value)168*f5c631daSSadaf Ebrahimi int Float16Classify(Float16 value) {
169*f5c631daSSadaf Ebrahimi uint16_t bits = Float16ToRawbits(value);
170*f5c631daSSadaf Ebrahimi uint16_t exponent_max = (1 << 5) - 1;
171*f5c631daSSadaf Ebrahimi uint16_t exponent_mask = exponent_max << 10;
172*f5c631daSSadaf Ebrahimi uint16_t mantissa_mask = (1 << 10) - 1;
173*f5c631daSSadaf Ebrahimi
174*f5c631daSSadaf Ebrahimi uint16_t exponent = (bits & exponent_mask) >> 10;
175*f5c631daSSadaf Ebrahimi uint16_t mantissa = bits & mantissa_mask;
176*f5c631daSSadaf Ebrahimi if (exponent == 0) {
177*f5c631daSSadaf Ebrahimi if (mantissa == 0) {
178*f5c631daSSadaf Ebrahimi return FP_ZERO;
179*f5c631daSSadaf Ebrahimi }
180*f5c631daSSadaf Ebrahimi return FP_SUBNORMAL;
181*f5c631daSSadaf Ebrahimi } else if (exponent == exponent_max) {
182*f5c631daSSadaf Ebrahimi if (mantissa == 0) {
183*f5c631daSSadaf Ebrahimi return FP_INFINITE;
184*f5c631daSSadaf Ebrahimi }
185*f5c631daSSadaf Ebrahimi return FP_NAN;
186*f5c631daSSadaf Ebrahimi }
187*f5c631daSSadaf Ebrahimi return FP_NORMAL;
188*f5c631daSSadaf Ebrahimi }
189*f5c631daSSadaf Ebrahimi
190*f5c631daSSadaf Ebrahimi
CountClearHalfWords(uint64_t imm,unsigned reg_size)191*f5c631daSSadaf Ebrahimi unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
192*f5c631daSSadaf Ebrahimi VIXL_ASSERT((reg_size % 8) == 0);
193*f5c631daSSadaf Ebrahimi int count = 0;
194*f5c631daSSadaf Ebrahimi for (unsigned i = 0; i < (reg_size / 16); i++) {
195*f5c631daSSadaf Ebrahimi if ((imm & 0xffff) == 0) {
196*f5c631daSSadaf Ebrahimi count++;
197*f5c631daSSadaf Ebrahimi }
198*f5c631daSSadaf Ebrahimi imm >>= 16;
199*f5c631daSSadaf Ebrahimi }
200*f5c631daSSadaf Ebrahimi return count;
201*f5c631daSSadaf Ebrahimi }
202*f5c631daSSadaf Ebrahimi
203*f5c631daSSadaf Ebrahimi
BitCount(uint64_t value)204*f5c631daSSadaf Ebrahimi int BitCount(uint64_t value) { return CountSetBits(value); }
205*f5c631daSSadaf Ebrahimi
206*f5c631daSSadaf Ebrahimi // Float16 definitions.
207*f5c631daSSadaf Ebrahimi
Float16(double dvalue)208*f5c631daSSadaf Ebrahimi Float16::Float16(double dvalue) {
209*f5c631daSSadaf Ebrahimi rawbits_ =
210*f5c631daSSadaf Ebrahimi Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
211*f5c631daSSadaf Ebrahimi }
212*f5c631daSSadaf Ebrahimi
213*f5c631daSSadaf Ebrahimi namespace internal {
214*f5c631daSSadaf Ebrahimi
operator -() const215*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator-() const {
216*f5c631daSSadaf Ebrahimi return RawbitsToFloat16(rawbits_ ^ 0x8000);
217*f5c631daSSadaf Ebrahimi }
218*f5c631daSSadaf Ebrahimi
219*f5c631daSSadaf Ebrahimi // SimFloat16 definitions.
operator +(SimFloat16 rhs) const220*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
221*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) + static_cast<double>(rhs);
222*f5c631daSSadaf Ebrahimi }
223*f5c631daSSadaf Ebrahimi
operator -(SimFloat16 rhs) const224*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
225*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) - static_cast<double>(rhs);
226*f5c631daSSadaf Ebrahimi }
227*f5c631daSSadaf Ebrahimi
operator *(SimFloat16 rhs) const228*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
229*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) * static_cast<double>(rhs);
230*f5c631daSSadaf Ebrahimi }
231*f5c631daSSadaf Ebrahimi
operator /(SimFloat16 rhs) const232*f5c631daSSadaf Ebrahimi SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
233*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) / static_cast<double>(rhs);
234*f5c631daSSadaf Ebrahimi }
235*f5c631daSSadaf Ebrahimi
operator <(SimFloat16 rhs) const236*f5c631daSSadaf Ebrahimi bool SimFloat16::operator<(SimFloat16 rhs) const {
237*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) < static_cast<double>(rhs);
238*f5c631daSSadaf Ebrahimi }
239*f5c631daSSadaf Ebrahimi
operator >(SimFloat16 rhs) const240*f5c631daSSadaf Ebrahimi bool SimFloat16::operator>(SimFloat16 rhs) const {
241*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) > static_cast<double>(rhs);
242*f5c631daSSadaf Ebrahimi }
243*f5c631daSSadaf Ebrahimi
operator ==(SimFloat16 rhs) const244*f5c631daSSadaf Ebrahimi bool SimFloat16::operator==(SimFloat16 rhs) const {
245*f5c631daSSadaf Ebrahimi if (IsNaN(*this) || IsNaN(rhs)) {
246*f5c631daSSadaf Ebrahimi return false;
247*f5c631daSSadaf Ebrahimi } else if (IsZero(rhs) && IsZero(*this)) {
248*f5c631daSSadaf Ebrahimi // +0 and -0 should be treated as equal.
249*f5c631daSSadaf Ebrahimi return true;
250*f5c631daSSadaf Ebrahimi }
251*f5c631daSSadaf Ebrahimi return this->rawbits_ == rhs.rawbits_;
252*f5c631daSSadaf Ebrahimi }
253*f5c631daSSadaf Ebrahimi
operator !=(SimFloat16 rhs) const254*f5c631daSSadaf Ebrahimi bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
255*f5c631daSSadaf Ebrahimi
operator ==(double rhs) const256*f5c631daSSadaf Ebrahimi bool SimFloat16::operator==(double rhs) const {
257*f5c631daSSadaf Ebrahimi return static_cast<double>(*this) == static_cast<double>(rhs);
258*f5c631daSSadaf Ebrahimi }
259*f5c631daSSadaf Ebrahimi
operator double() const260*f5c631daSSadaf Ebrahimi SimFloat16::operator double() const {
261*f5c631daSSadaf Ebrahimi return FPToDouble(*this, kIgnoreDefaultNaN);
262*f5c631daSSadaf Ebrahimi }
263*f5c631daSSadaf Ebrahimi
BitCount(Uint32 value)264*f5c631daSSadaf Ebrahimi Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
265*f5c631daSSadaf Ebrahimi
266*f5c631daSSadaf Ebrahimi } // namespace internal
267*f5c631daSSadaf Ebrahimi
FPToFloat(Float16 value,UseDefaultNaN DN,bool * exception)268*f5c631daSSadaf Ebrahimi float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
269*f5c631daSSadaf Ebrahimi uint16_t bits = Float16ToRawbits(value);
270*f5c631daSSadaf Ebrahimi uint32_t sign = bits >> 15;
271*f5c631daSSadaf Ebrahimi uint32_t exponent =
272*f5c631daSSadaf Ebrahimi ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
273*f5c631daSSadaf Ebrahimi kFloat16MantissaBits,
274*f5c631daSSadaf Ebrahimi bits);
275*f5c631daSSadaf Ebrahimi uint32_t mantissa =
276*f5c631daSSadaf Ebrahimi ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
277*f5c631daSSadaf Ebrahimi
278*f5c631daSSadaf Ebrahimi switch (Float16Classify(value)) {
279*f5c631daSSadaf Ebrahimi case FP_ZERO:
280*f5c631daSSadaf Ebrahimi return (sign == 0) ? 0.0f : -0.0f;
281*f5c631daSSadaf Ebrahimi
282*f5c631daSSadaf Ebrahimi case FP_INFINITE:
283*f5c631daSSadaf Ebrahimi return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
284*f5c631daSSadaf Ebrahimi
285*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL: {
286*f5c631daSSadaf Ebrahimi // Calculate shift required to put mantissa into the most-significant bits
287*f5c631daSSadaf Ebrahimi // of the destination mantissa.
288*f5c631daSSadaf Ebrahimi int shift = CountLeadingZeros(mantissa << (32 - 10));
289*f5c631daSSadaf Ebrahimi
290*f5c631daSSadaf Ebrahimi // Shift mantissa and discard implicit '1'.
291*f5c631daSSadaf Ebrahimi mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
292*f5c631daSSadaf Ebrahimi mantissa &= (1 << kFloatMantissaBits) - 1;
293*f5c631daSSadaf Ebrahimi
294*f5c631daSSadaf Ebrahimi // Adjust the exponent for the shift applied, and rebias.
295*f5c631daSSadaf Ebrahimi exponent = exponent - shift + (-15 + 127);
296*f5c631daSSadaf Ebrahimi break;
297*f5c631daSSadaf Ebrahimi }
298*f5c631daSSadaf Ebrahimi
299*f5c631daSSadaf Ebrahimi case FP_NAN:
300*f5c631daSSadaf Ebrahimi if (IsSignallingNaN(value)) {
301*f5c631daSSadaf Ebrahimi if (exception != NULL) {
302*f5c631daSSadaf Ebrahimi *exception = true;
303*f5c631daSSadaf Ebrahimi }
304*f5c631daSSadaf Ebrahimi }
305*f5c631daSSadaf Ebrahimi if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
306*f5c631daSSadaf Ebrahimi
307*f5c631daSSadaf Ebrahimi // Convert NaNs as the processor would:
308*f5c631daSSadaf Ebrahimi // - The sign is propagated.
309*f5c631daSSadaf Ebrahimi // - The payload (mantissa) is transferred entirely, except that the top
310*f5c631daSSadaf Ebrahimi // bit is forced to '1', making the result a quiet NaN. The unused
311*f5c631daSSadaf Ebrahimi // (low-order) payload bits are set to 0.
312*f5c631daSSadaf Ebrahimi exponent = (1 << kFloatExponentBits) - 1;
313*f5c631daSSadaf Ebrahimi
314*f5c631daSSadaf Ebrahimi // Increase bits in mantissa, making low-order bits 0.
315*f5c631daSSadaf Ebrahimi mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
316*f5c631daSSadaf Ebrahimi mantissa |= 1 << 22; // Force a quiet NaN.
317*f5c631daSSadaf Ebrahimi break;
318*f5c631daSSadaf Ebrahimi
319*f5c631daSSadaf Ebrahimi case FP_NORMAL:
320*f5c631daSSadaf Ebrahimi // Increase bits in mantissa, making low-order bits 0.
321*f5c631daSSadaf Ebrahimi mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
322*f5c631daSSadaf Ebrahimi
323*f5c631daSSadaf Ebrahimi // Change exponent bias.
324*f5c631daSSadaf Ebrahimi exponent += (-15 + 127);
325*f5c631daSSadaf Ebrahimi break;
326*f5c631daSSadaf Ebrahimi
327*f5c631daSSadaf Ebrahimi default:
328*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
329*f5c631daSSadaf Ebrahimi }
330*f5c631daSSadaf Ebrahimi return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
331*f5c631daSSadaf Ebrahimi mantissa);
332*f5c631daSSadaf Ebrahimi }
333*f5c631daSSadaf Ebrahimi
334*f5c631daSSadaf Ebrahimi
FPToFloat(double value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)335*f5c631daSSadaf Ebrahimi float FPToFloat(double value,
336*f5c631daSSadaf Ebrahimi FPRounding round_mode,
337*f5c631daSSadaf Ebrahimi UseDefaultNaN DN,
338*f5c631daSSadaf Ebrahimi bool* exception) {
339*f5c631daSSadaf Ebrahimi // Only the FPTieEven rounding mode is implemented.
340*f5c631daSSadaf Ebrahimi VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
341*f5c631daSSadaf Ebrahimi USE(round_mode);
342*f5c631daSSadaf Ebrahimi
343*f5c631daSSadaf Ebrahimi switch (std::fpclassify(value)) {
344*f5c631daSSadaf Ebrahimi case FP_NAN: {
345*f5c631daSSadaf Ebrahimi if (IsSignallingNaN(value)) {
346*f5c631daSSadaf Ebrahimi if (exception != NULL) {
347*f5c631daSSadaf Ebrahimi *exception = true;
348*f5c631daSSadaf Ebrahimi }
349*f5c631daSSadaf Ebrahimi }
350*f5c631daSSadaf Ebrahimi if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
351*f5c631daSSadaf Ebrahimi
352*f5c631daSSadaf Ebrahimi // Convert NaNs as the processor would:
353*f5c631daSSadaf Ebrahimi // - The sign is propagated.
354*f5c631daSSadaf Ebrahimi // - The payload (mantissa) is transferred as much as possible, except
355*f5c631daSSadaf Ebrahimi // that the top bit is forced to '1', making the result a quiet NaN.
356*f5c631daSSadaf Ebrahimi uint64_t raw = DoubleToRawbits(value);
357*f5c631daSSadaf Ebrahimi
358*f5c631daSSadaf Ebrahimi uint32_t sign = raw >> 63;
359*f5c631daSSadaf Ebrahimi uint32_t exponent = (1 << 8) - 1;
360*f5c631daSSadaf Ebrahimi uint32_t payload =
361*f5c631daSSadaf Ebrahimi static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
362*f5c631daSSadaf Ebrahimi payload |= (1 << 22); // Force a quiet NaN.
363*f5c631daSSadaf Ebrahimi
364*f5c631daSSadaf Ebrahimi return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
365*f5c631daSSadaf Ebrahimi }
366*f5c631daSSadaf Ebrahimi
367*f5c631daSSadaf Ebrahimi case FP_ZERO:
368*f5c631daSSadaf Ebrahimi case FP_INFINITE: {
369*f5c631daSSadaf Ebrahimi // In a C++ cast, any value representable in the target type will be
370*f5c631daSSadaf Ebrahimi // unchanged. This is always the case for +/-0.0 and infinities.
371*f5c631daSSadaf Ebrahimi return static_cast<float>(value);
372*f5c631daSSadaf Ebrahimi }
373*f5c631daSSadaf Ebrahimi
374*f5c631daSSadaf Ebrahimi case FP_NORMAL:
375*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL: {
376*f5c631daSSadaf Ebrahimi // Convert double-to-float as the processor would, assuming that FPCR.FZ
377*f5c631daSSadaf Ebrahimi // (flush-to-zero) is not set.
378*f5c631daSSadaf Ebrahimi uint64_t raw = DoubleToRawbits(value);
379*f5c631daSSadaf Ebrahimi // Extract the IEEE-754 double components.
380*f5c631daSSadaf Ebrahimi uint32_t sign = raw >> 63;
381*f5c631daSSadaf Ebrahimi // Extract the exponent and remove the IEEE-754 encoding bias.
382*f5c631daSSadaf Ebrahimi int32_t exponent =
383*f5c631daSSadaf Ebrahimi static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
384*f5c631daSSadaf Ebrahimi // Extract the mantissa and add the implicit '1' bit.
385*f5c631daSSadaf Ebrahimi uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
386*f5c631daSSadaf Ebrahimi if (std::fpclassify(value) == FP_NORMAL) {
387*f5c631daSSadaf Ebrahimi mantissa |= (UINT64_C(1) << 52);
388*f5c631daSSadaf Ebrahimi }
389*f5c631daSSadaf Ebrahimi return FPRoundToFloat(sign, exponent, mantissa, round_mode);
390*f5c631daSSadaf Ebrahimi }
391*f5c631daSSadaf Ebrahimi }
392*f5c631daSSadaf Ebrahimi
393*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
394*f5c631daSSadaf Ebrahimi return value;
395*f5c631daSSadaf Ebrahimi }
396*f5c631daSSadaf Ebrahimi
397*f5c631daSSadaf Ebrahimi // TODO: We should consider implementing a full FPToDouble(Float16)
398*f5c631daSSadaf Ebrahimi // conversion function (for performance reasons).
FPToDouble(Float16 value,UseDefaultNaN DN,bool * exception)399*f5c631daSSadaf Ebrahimi double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
400*f5c631daSSadaf Ebrahimi // We can rely on implicit float to double conversion here.
401*f5c631daSSadaf Ebrahimi return FPToFloat(value, DN, exception);
402*f5c631daSSadaf Ebrahimi }
403*f5c631daSSadaf Ebrahimi
404*f5c631daSSadaf Ebrahimi
FPToDouble(float value,UseDefaultNaN DN,bool * exception)405*f5c631daSSadaf Ebrahimi double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
406*f5c631daSSadaf Ebrahimi switch (std::fpclassify(value)) {
407*f5c631daSSadaf Ebrahimi case FP_NAN: {
408*f5c631daSSadaf Ebrahimi if (IsSignallingNaN(value)) {
409*f5c631daSSadaf Ebrahimi if (exception != NULL) {
410*f5c631daSSadaf Ebrahimi *exception = true;
411*f5c631daSSadaf Ebrahimi }
412*f5c631daSSadaf Ebrahimi }
413*f5c631daSSadaf Ebrahimi if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
414*f5c631daSSadaf Ebrahimi
415*f5c631daSSadaf Ebrahimi // Convert NaNs as the processor would:
416*f5c631daSSadaf Ebrahimi // - The sign is propagated.
417*f5c631daSSadaf Ebrahimi // - The payload (mantissa) is transferred entirely, except that the top
418*f5c631daSSadaf Ebrahimi // bit is forced to '1', making the result a quiet NaN. The unused
419*f5c631daSSadaf Ebrahimi // (low-order) payload bits are set to 0.
420*f5c631daSSadaf Ebrahimi uint32_t raw = FloatToRawbits(value);
421*f5c631daSSadaf Ebrahimi
422*f5c631daSSadaf Ebrahimi uint64_t sign = raw >> 31;
423*f5c631daSSadaf Ebrahimi uint64_t exponent = (1 << 11) - 1;
424*f5c631daSSadaf Ebrahimi uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
425*f5c631daSSadaf Ebrahimi payload <<= (52 - 23); // The unused low-order bits should be 0.
426*f5c631daSSadaf Ebrahimi payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
427*f5c631daSSadaf Ebrahimi
428*f5c631daSSadaf Ebrahimi return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
429*f5c631daSSadaf Ebrahimi }
430*f5c631daSSadaf Ebrahimi
431*f5c631daSSadaf Ebrahimi case FP_ZERO:
432*f5c631daSSadaf Ebrahimi case FP_NORMAL:
433*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL:
434*f5c631daSSadaf Ebrahimi case FP_INFINITE: {
435*f5c631daSSadaf Ebrahimi // All other inputs are preserved in a standard cast, because every value
436*f5c631daSSadaf Ebrahimi // representable using an IEEE-754 float is also representable using an
437*f5c631daSSadaf Ebrahimi // IEEE-754 double.
438*f5c631daSSadaf Ebrahimi return static_cast<double>(value);
439*f5c631daSSadaf Ebrahimi }
440*f5c631daSSadaf Ebrahimi }
441*f5c631daSSadaf Ebrahimi
442*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
443*f5c631daSSadaf Ebrahimi return static_cast<double>(value);
444*f5c631daSSadaf Ebrahimi }
445*f5c631daSSadaf Ebrahimi
446*f5c631daSSadaf Ebrahimi
FPToFloat16(float value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)447*f5c631daSSadaf Ebrahimi Float16 FPToFloat16(float value,
448*f5c631daSSadaf Ebrahimi FPRounding round_mode,
449*f5c631daSSadaf Ebrahimi UseDefaultNaN DN,
450*f5c631daSSadaf Ebrahimi bool* exception) {
451*f5c631daSSadaf Ebrahimi // Only the FPTieEven rounding mode is implemented.
452*f5c631daSSadaf Ebrahimi VIXL_ASSERT(round_mode == FPTieEven);
453*f5c631daSSadaf Ebrahimi USE(round_mode);
454*f5c631daSSadaf Ebrahimi
455*f5c631daSSadaf Ebrahimi uint32_t raw = FloatToRawbits(value);
456*f5c631daSSadaf Ebrahimi int32_t sign = raw >> 31;
457*f5c631daSSadaf Ebrahimi int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
458*f5c631daSSadaf Ebrahimi uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
459*f5c631daSSadaf Ebrahimi
460*f5c631daSSadaf Ebrahimi switch (std::fpclassify(value)) {
461*f5c631daSSadaf Ebrahimi case FP_NAN: {
462*f5c631daSSadaf Ebrahimi if (IsSignallingNaN(value)) {
463*f5c631daSSadaf Ebrahimi if (exception != NULL) {
464*f5c631daSSadaf Ebrahimi *exception = true;
465*f5c631daSSadaf Ebrahimi }
466*f5c631daSSadaf Ebrahimi }
467*f5c631daSSadaf Ebrahimi if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
468*f5c631daSSadaf Ebrahimi
469*f5c631daSSadaf Ebrahimi // Convert NaNs as the processor would:
470*f5c631daSSadaf Ebrahimi // - The sign is propagated.
471*f5c631daSSadaf Ebrahimi // - The payload (mantissa) is transferred as much as possible, except
472*f5c631daSSadaf Ebrahimi // that the top bit is forced to '1', making the result a quiet NaN.
473*f5c631daSSadaf Ebrahimi uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
474*f5c631daSSadaf Ebrahimi : Float16ToRawbits(kFP16NegativeInfinity);
475*f5c631daSSadaf Ebrahimi result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
476*f5c631daSSadaf Ebrahimi result |= (1 << 9); // Force a quiet NaN;
477*f5c631daSSadaf Ebrahimi return RawbitsToFloat16(result);
478*f5c631daSSadaf Ebrahimi }
479*f5c631daSSadaf Ebrahimi
480*f5c631daSSadaf Ebrahimi case FP_ZERO:
481*f5c631daSSadaf Ebrahimi return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
482*f5c631daSSadaf Ebrahimi
483*f5c631daSSadaf Ebrahimi case FP_INFINITE:
484*f5c631daSSadaf Ebrahimi return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
485*f5c631daSSadaf Ebrahimi
486*f5c631daSSadaf Ebrahimi case FP_NORMAL:
487*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL: {
488*f5c631daSSadaf Ebrahimi // Convert float-to-half as the processor would, assuming that FPCR.FZ
489*f5c631daSSadaf Ebrahimi // (flush-to-zero) is not set.
490*f5c631daSSadaf Ebrahimi
491*f5c631daSSadaf Ebrahimi // Add the implicit '1' bit to the mantissa.
492*f5c631daSSadaf Ebrahimi mantissa += (1 << 23);
493*f5c631daSSadaf Ebrahimi return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
494*f5c631daSSadaf Ebrahimi }
495*f5c631daSSadaf Ebrahimi }
496*f5c631daSSadaf Ebrahimi
497*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
498*f5c631daSSadaf Ebrahimi return kFP16PositiveZero;
499*f5c631daSSadaf Ebrahimi }
500*f5c631daSSadaf Ebrahimi
501*f5c631daSSadaf Ebrahimi
FPToFloat16(double value,FPRounding round_mode,UseDefaultNaN DN,bool * exception)502*f5c631daSSadaf Ebrahimi Float16 FPToFloat16(double value,
503*f5c631daSSadaf Ebrahimi FPRounding round_mode,
504*f5c631daSSadaf Ebrahimi UseDefaultNaN DN,
505*f5c631daSSadaf Ebrahimi bool* exception) {
506*f5c631daSSadaf Ebrahimi // Only the FPTieEven rounding mode is implemented.
507*f5c631daSSadaf Ebrahimi VIXL_ASSERT(round_mode == FPTieEven);
508*f5c631daSSadaf Ebrahimi USE(round_mode);
509*f5c631daSSadaf Ebrahimi
510*f5c631daSSadaf Ebrahimi uint64_t raw = DoubleToRawbits(value);
511*f5c631daSSadaf Ebrahimi int32_t sign = raw >> 63;
512*f5c631daSSadaf Ebrahimi int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
513*f5c631daSSadaf Ebrahimi uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
514*f5c631daSSadaf Ebrahimi
515*f5c631daSSadaf Ebrahimi switch (std::fpclassify(value)) {
516*f5c631daSSadaf Ebrahimi case FP_NAN: {
517*f5c631daSSadaf Ebrahimi if (IsSignallingNaN(value)) {
518*f5c631daSSadaf Ebrahimi if (exception != NULL) {
519*f5c631daSSadaf Ebrahimi *exception = true;
520*f5c631daSSadaf Ebrahimi }
521*f5c631daSSadaf Ebrahimi }
522*f5c631daSSadaf Ebrahimi if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
523*f5c631daSSadaf Ebrahimi
524*f5c631daSSadaf Ebrahimi // Convert NaNs as the processor would:
525*f5c631daSSadaf Ebrahimi // - The sign is propagated.
526*f5c631daSSadaf Ebrahimi // - The payload (mantissa) is transferred as much as possible, except
527*f5c631daSSadaf Ebrahimi // that the top bit is forced to '1', making the result a quiet NaN.
528*f5c631daSSadaf Ebrahimi uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
529*f5c631daSSadaf Ebrahimi : Float16ToRawbits(kFP16NegativeInfinity);
530*f5c631daSSadaf Ebrahimi result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
531*f5c631daSSadaf Ebrahimi result |= (1 << 9); // Force a quiet NaN;
532*f5c631daSSadaf Ebrahimi return RawbitsToFloat16(result);
533*f5c631daSSadaf Ebrahimi }
534*f5c631daSSadaf Ebrahimi
535*f5c631daSSadaf Ebrahimi case FP_ZERO:
536*f5c631daSSadaf Ebrahimi return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
537*f5c631daSSadaf Ebrahimi
538*f5c631daSSadaf Ebrahimi case FP_INFINITE:
539*f5c631daSSadaf Ebrahimi return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
540*f5c631daSSadaf Ebrahimi case FP_NORMAL:
541*f5c631daSSadaf Ebrahimi case FP_SUBNORMAL: {
542*f5c631daSSadaf Ebrahimi // Convert double-to-half as the processor would, assuming that FPCR.FZ
543*f5c631daSSadaf Ebrahimi // (flush-to-zero) is not set.
544*f5c631daSSadaf Ebrahimi
545*f5c631daSSadaf Ebrahimi // Add the implicit '1' bit to the mantissa.
546*f5c631daSSadaf Ebrahimi mantissa += (UINT64_C(1) << 52);
547*f5c631daSSadaf Ebrahimi return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
548*f5c631daSSadaf Ebrahimi }
549*f5c631daSSadaf Ebrahimi }
550*f5c631daSSadaf Ebrahimi
551*f5c631daSSadaf Ebrahimi VIXL_UNREACHABLE();
552*f5c631daSSadaf Ebrahimi return kFP16PositiveZero;
553*f5c631daSSadaf Ebrahimi }
554*f5c631daSSadaf Ebrahimi
555*f5c631daSSadaf Ebrahimi } // namespace vixl
556