xref: /aosp_15_r20/external/XNNPACK/src/f32-f16-vcvt/scalar-bitcast.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker//
3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1
7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h>
8*4bdc9457SAndroid Build Coastguard Worker
9*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h>
10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h>
11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vcvt.h>
12*4bdc9457SAndroid Build Coastguard Worker
13*4bdc9457SAndroid Build Coastguard Worker
14*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x${BATCH_TILE}(
15*4bdc9457SAndroid Build Coastguard Worker    size_t n,
16*4bdc9457SAndroid Build Coastguard Worker    const float* input,
17*4bdc9457SAndroid Build Coastguard Worker    void* output,
18*4bdc9457SAndroid Build Coastguard Worker    const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
19*4bdc9457SAndroid Build Coastguard Worker{
20*4bdc9457SAndroid Build Coastguard Worker  assert(n != 0);
21*4bdc9457SAndroid Build Coastguard Worker  assert(n % sizeof(float) == 0);
22*4bdc9457SAndroid Build Coastguard Worker  assert(input != NULL);
23*4bdc9457SAndroid Build Coastguard Worker  assert(output != NULL);
24*4bdc9457SAndroid Build Coastguard Worker
25*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vnonsign_mask = params->scalar_bitcast.nonsign_mask;
26*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vexp_bias = params->scalar_bitcast.exp_bias;
27*4bdc9457SAndroid Build Coastguard Worker  const float vscale_to_inf = params->scalar_bitcast.scale_to_inf;
28*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vexpw_max = params->scalar_bitcast.expw_max;
29*4bdc9457SAndroid Build Coastguard Worker  const float vscale_to_zero = params->scalar_bitcast.scale_to_zero;
30*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vbias_min = params->scalar_bitcast.bias_min;
31*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vexph_mask = params->scalar_bitcast.exph_mask;
32*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vmanth_mask = params->scalar_bitcast.manth_mask;
33*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vnanh = params->scalar_bitcast.nanh;
34*4bdc9457SAndroid Build Coastguard Worker
35*4bdc9457SAndroid Build Coastguard Worker  const uint32_t* i = (const uint32_t*) input;
36*4bdc9457SAndroid Build Coastguard Worker  uint16_t* o = (uint16_t*) output;
37*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE > 1:
38*4bdc9457SAndroid Build Coastguard Worker    for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
39*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
40*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vw${N} = i[${N}];
41*4bdc9457SAndroid Build Coastguard Worker      i += ${BATCH_TILE};
42*4bdc9457SAndroid Build Coastguard Worker
43*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
44*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vnonsignw${N} = vw${N} & vnonsign_mask;
45*4bdc9457SAndroid Build Coastguard Worker
46*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
47*4bdc9457SAndroid Build Coastguard Worker        float vf${N} = uint32_as_float(vnonsignw${N});
48*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
49*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vsignw${N} = vw${N} ^ vnonsignw${N};
50*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
51*4bdc9457SAndroid Build Coastguard Worker        uint32_t vbias${N} = vnonsignw${N} + vexp_bias;
52*4bdc9457SAndroid Build Coastguard Worker
53*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
54*4bdc9457SAndroid Build Coastguard Worker        vf${N} *= vscale_to_inf;
55*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
56*4bdc9457SAndroid Build Coastguard Worker        vbias${N} &= vexpw_max;
57*4bdc9457SAndroid Build Coastguard Worker
58*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
59*4bdc9457SAndroid Build Coastguard Worker        vf${N} *= vscale_to_zero;
60*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
61*4bdc9457SAndroid Build Coastguard Worker        vbias${N} = math_max_u32(vbias${N}, vbias_min);
62*4bdc9457SAndroid Build Coastguard Worker
63*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
64*4bdc9457SAndroid Build Coastguard Worker        vf${N} += uint32_as_float(vbias${N});
65*4bdc9457SAndroid Build Coastguard Worker
66*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
67*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vbits${N} = float_as_uint32(vf${N});
68*4bdc9457SAndroid Build Coastguard Worker
69*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
70*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask;
71*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
72*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask;
73*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
74*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16);
75*4bdc9457SAndroid Build Coastguard Worker
76*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
77*4bdc9457SAndroid Build Coastguard Worker        uint16_t vh${N} = vexph${N} + vmanth${N};
78*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
79*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) {
80*4bdc9457SAndroid Build Coastguard Worker          vh${N} = vnanh;
81*4bdc9457SAndroid Build Coastguard Worker        }
82*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
83*4bdc9457SAndroid Build Coastguard Worker        vh${N} |= vsignh${N};
84*4bdc9457SAndroid Build Coastguard Worker
85*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
86*4bdc9457SAndroid Build Coastguard Worker        o[${N}] = vh${N};
87*4bdc9457SAndroid Build Coastguard Worker      o += ${BATCH_TILE};
88*4bdc9457SAndroid Build Coastguard Worker    }
89*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE == 1:
90*4bdc9457SAndroid Build Coastguard Worker    do {
91*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vw = *i++;
92*4bdc9457SAndroid Build Coastguard Worker
93*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vnonsignw = vw & vnonsign_mask;
94*4bdc9457SAndroid Build Coastguard Worker
95*4bdc9457SAndroid Build Coastguard Worker      float vf = uint32_as_float(vnonsignw);
96*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vsignw = vw ^ vnonsignw;
97*4bdc9457SAndroid Build Coastguard Worker      uint32_t vbias = vnonsignw + vexp_bias;
98*4bdc9457SAndroid Build Coastguard Worker
99*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_inf;
100*4bdc9457SAndroid Build Coastguard Worker      vbias &= vexpw_max;
101*4bdc9457SAndroid Build Coastguard Worker
102*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_zero;
103*4bdc9457SAndroid Build Coastguard Worker      vbias = math_max_u32(vbias, vbias_min);
104*4bdc9457SAndroid Build Coastguard Worker
105*4bdc9457SAndroid Build Coastguard Worker      vf += uint32_as_float(vbias);
106*4bdc9457SAndroid Build Coastguard Worker
107*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vbits = float_as_uint32(vf);
108*4bdc9457SAndroid Build Coastguard Worker
109*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
110*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
111*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
112*4bdc9457SAndroid Build Coastguard Worker
113*4bdc9457SAndroid Build Coastguard Worker      uint16_t vh = vexph + vmanth;
114*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
115*4bdc9457SAndroid Build Coastguard Worker        vh = vnanh;
116*4bdc9457SAndroid Build Coastguard Worker      }
117*4bdc9457SAndroid Build Coastguard Worker      vh |= vsignh;
118*4bdc9457SAndroid Build Coastguard Worker
119*4bdc9457SAndroid Build Coastguard Worker      *o++ = vh;
120*4bdc9457SAndroid Build Coastguard Worker
121*4bdc9457SAndroid Build Coastguard Worker      n -= sizeof(float);
122*4bdc9457SAndroid Build Coastguard Worker    } while (n != 0);
123*4bdc9457SAndroid Build Coastguard Worker  $elif BATCH_TILE == 2:
124*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
125*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vw = *i;
126*4bdc9457SAndroid Build Coastguard Worker
127*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vnonsignw = vw & vnonsign_mask;
128*4bdc9457SAndroid Build Coastguard Worker
129*4bdc9457SAndroid Build Coastguard Worker      float vf = uint32_as_float(vnonsignw);
130*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vsignw = vw ^ vnonsignw;
131*4bdc9457SAndroid Build Coastguard Worker      uint32_t vbias = vnonsignw + vexp_bias;
132*4bdc9457SAndroid Build Coastguard Worker
133*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_inf;
134*4bdc9457SAndroid Build Coastguard Worker      vbias &= vexpw_max;
135*4bdc9457SAndroid Build Coastguard Worker
136*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_zero;
137*4bdc9457SAndroid Build Coastguard Worker      vbias = math_max_u32(vbias, vbias_min);
138*4bdc9457SAndroid Build Coastguard Worker
139*4bdc9457SAndroid Build Coastguard Worker      vf += uint32_as_float(vbias);
140*4bdc9457SAndroid Build Coastguard Worker
141*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vbits = float_as_uint32(vf);
142*4bdc9457SAndroid Build Coastguard Worker
143*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
144*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
145*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
146*4bdc9457SAndroid Build Coastguard Worker
147*4bdc9457SAndroid Build Coastguard Worker      uint16_t vh = vexph + vmanth;
148*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
149*4bdc9457SAndroid Build Coastguard Worker        vh = vnanh;
150*4bdc9457SAndroid Build Coastguard Worker      }
151*4bdc9457SAndroid Build Coastguard Worker      vh |= vsignh;
152*4bdc9457SAndroid Build Coastguard Worker
153*4bdc9457SAndroid Build Coastguard Worker      *o = vh;
154*4bdc9457SAndroid Build Coastguard Worker    }
155*4bdc9457SAndroid Build Coastguard Worker  $else:
156*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
157*4bdc9457SAndroid Build Coastguard Worker      do {
158*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vw = *i++;
159*4bdc9457SAndroid Build Coastguard Worker
160*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vnonsignw = vw & vnonsign_mask;
161*4bdc9457SAndroid Build Coastguard Worker
162*4bdc9457SAndroid Build Coastguard Worker        float vf = uint32_as_float(vnonsignw);
163*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vsignw = vw ^ vnonsignw;
164*4bdc9457SAndroid Build Coastguard Worker        uint32_t vbias = vnonsignw + vexp_bias;
165*4bdc9457SAndroid Build Coastguard Worker
166*4bdc9457SAndroid Build Coastguard Worker        vf *= vscale_to_inf;
167*4bdc9457SAndroid Build Coastguard Worker        vbias &= vexpw_max;
168*4bdc9457SAndroid Build Coastguard Worker
169*4bdc9457SAndroid Build Coastguard Worker        vf *= vscale_to_zero;
170*4bdc9457SAndroid Build Coastguard Worker        vbias = math_max_u32(vbias, vbias_min);
171*4bdc9457SAndroid Build Coastguard Worker
172*4bdc9457SAndroid Build Coastguard Worker        vf += uint32_as_float(vbias);
173*4bdc9457SAndroid Build Coastguard Worker
174*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vbits = float_as_uint32(vf);
175*4bdc9457SAndroid Build Coastguard Worker
176*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
177*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
178*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vsignh = (uint16_t) (vsignw >> 16);
179*4bdc9457SAndroid Build Coastguard Worker
180*4bdc9457SAndroid Build Coastguard Worker        uint16_t vh = vexph + vmanth;
181*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
182*4bdc9457SAndroid Build Coastguard Worker          vh = vnanh;
183*4bdc9457SAndroid Build Coastguard Worker        }
184*4bdc9457SAndroid Build Coastguard Worker        vh |= vsignh;
185*4bdc9457SAndroid Build Coastguard Worker
186*4bdc9457SAndroid Build Coastguard Worker        *o++ = vh;
187*4bdc9457SAndroid Build Coastguard Worker
188*4bdc9457SAndroid Build Coastguard Worker        n -= sizeof(float);
189*4bdc9457SAndroid Build Coastguard Worker      } while (n != 0);
190*4bdc9457SAndroid Build Coastguard Worker    }
191*4bdc9457SAndroid Build Coastguard Worker}
192