xref: /aosp_15_r20/external/XNNPACK/src/f32-f16-vcvt/scalar-fabsf.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker//
3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1
7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h>
8*4bdc9457SAndroid Build Coastguard Worker#include <math.h>
9*4bdc9457SAndroid Build Coastguard Worker
10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h>
11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h>
12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vcvt.h>
13*4bdc9457SAndroid Build Coastguard Worker
14*4bdc9457SAndroid Build Coastguard Worker
15*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x${BATCH_TILE}(
16*4bdc9457SAndroid Build Coastguard Worker    size_t n,
17*4bdc9457SAndroid Build Coastguard Worker    const float* input,
18*4bdc9457SAndroid Build Coastguard Worker    void* output,
19*4bdc9457SAndroid Build Coastguard Worker    const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
20*4bdc9457SAndroid Build Coastguard Worker{
21*4bdc9457SAndroid Build Coastguard Worker  assert(n != 0);
22*4bdc9457SAndroid Build Coastguard Worker  assert(n % sizeof(float) == 0);
23*4bdc9457SAndroid Build Coastguard Worker  assert(input != NULL);
24*4bdc9457SAndroid Build Coastguard Worker  assert(output != NULL);
25*4bdc9457SAndroid Build Coastguard Worker
26*4bdc9457SAndroid Build Coastguard Worker  const float vscale_to_inf = params->scalar_fabsf.scale_to_inf;
27*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vexp_bias = params->scalar_fabsf.exp_bias;
28*4bdc9457SAndroid Build Coastguard Worker  const float vscale_to_zero = params->scalar_fabsf.scale_to_zero;
29*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vexpw_max = params->scalar_fabsf.expw_max;
30*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vbias_min = params->scalar_fabsf.bias_min;
31*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vexph_mask = params->scalar_fabsf.exph_mask;
32*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask;
33*4bdc9457SAndroid Build Coastguard Worker  const uint16_t vnanh = params->scalar_fabsf.nanh;
34*4bdc9457SAndroid Build Coastguard Worker
35*4bdc9457SAndroid Build Coastguard Worker  uint16_t* o = (uint16_t*) output;
36*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE > 1:
37*4bdc9457SAndroid Build Coastguard Worker    for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
38*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
39*4bdc9457SAndroid Build Coastguard Worker        const float vx${N} = input[${N}];
40*4bdc9457SAndroid Build Coastguard Worker      input += ${BATCH_TILE};
41*4bdc9457SAndroid Build Coastguard Worker
42*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
43*4bdc9457SAndroid Build Coastguard Worker        const float vabsx${N} = fabsf(vx${N});
44*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
45*4bdc9457SAndroid Build Coastguard Worker        uint32_t vsignw${N} = float_as_uint32(vx${N});
46*4bdc9457SAndroid Build Coastguard Worker
47*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
48*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vnonsignw${N} = float_as_uint32(vabsx${N});
49*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
50*4bdc9457SAndroid Build Coastguard Worker        float vf${N} = vabsx${N} * vscale_to_inf;
51*4bdc9457SAndroid Build Coastguard Worker
52*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
53*4bdc9457SAndroid Build Coastguard Worker        uint32_t vbias${N} = vnonsignw${N} + vexp_bias;
54*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
55*4bdc9457SAndroid Build Coastguard Worker        vsignw${N} ^= vnonsignw${N};
56*4bdc9457SAndroid Build Coastguard Worker
57*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
58*4bdc9457SAndroid Build Coastguard Worker        vf${N} *= vscale_to_zero;
59*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
60*4bdc9457SAndroid Build Coastguard Worker        vbias${N} &= vexpw_max;
61*4bdc9457SAndroid Build Coastguard Worker
62*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
63*4bdc9457SAndroid Build Coastguard Worker        vbias${N} = math_max_u32(vbias${N}, vbias_min);
64*4bdc9457SAndroid Build Coastguard Worker
65*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
66*4bdc9457SAndroid Build Coastguard Worker        vf${N} += uint32_as_float(vbias${N});
67*4bdc9457SAndroid Build Coastguard Worker
68*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
69*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vbits${N} = float_as_uint32(vf${N});
70*4bdc9457SAndroid Build Coastguard Worker
71*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
72*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask;
73*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
74*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask;
75*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
76*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16);
77*4bdc9457SAndroid Build Coastguard Worker
78*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
79*4bdc9457SAndroid Build Coastguard Worker        uint16_t vh${N} = vexph${N} + vmanth${N};
80*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
81*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) {
82*4bdc9457SAndroid Build Coastguard Worker          vh${N} = vnanh;
83*4bdc9457SAndroid Build Coastguard Worker        }
84*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
85*4bdc9457SAndroid Build Coastguard Worker        vh${N} |= vsignh${N};
86*4bdc9457SAndroid Build Coastguard Worker
87*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
88*4bdc9457SAndroid Build Coastguard Worker        o[${N}] = vh${N};
89*4bdc9457SAndroid Build Coastguard Worker      o += ${BATCH_TILE};
90*4bdc9457SAndroid Build Coastguard Worker    }
91*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE == 1:
92*4bdc9457SAndroid Build Coastguard Worker    do {
93*4bdc9457SAndroid Build Coastguard Worker      const float vx = *input++;
94*4bdc9457SAndroid Build Coastguard Worker
95*4bdc9457SAndroid Build Coastguard Worker      const float vabsx = fabsf(vx);
96*4bdc9457SAndroid Build Coastguard Worker      uint32_t vsignw = float_as_uint32(vx);
97*4bdc9457SAndroid Build Coastguard Worker
98*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vnonsignw = float_as_uint32(vabsx);
99*4bdc9457SAndroid Build Coastguard Worker      float vf = vabsx * vscale_to_inf;
100*4bdc9457SAndroid Build Coastguard Worker
101*4bdc9457SAndroid Build Coastguard Worker      uint32_t vbias = vnonsignw + vexp_bias;
102*4bdc9457SAndroid Build Coastguard Worker      vsignw ^= vnonsignw;
103*4bdc9457SAndroid Build Coastguard Worker
104*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_zero;
105*4bdc9457SAndroid Build Coastguard Worker      vbias &= vexpw_max;
106*4bdc9457SAndroid Build Coastguard Worker
107*4bdc9457SAndroid Build Coastguard Worker      vbias = math_max_u32(vbias, vbias_min);
108*4bdc9457SAndroid Build Coastguard Worker
109*4bdc9457SAndroid Build Coastguard Worker      vf += uint32_as_float(vbias);
110*4bdc9457SAndroid Build Coastguard Worker
111*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vbits = float_as_uint32(vf);
112*4bdc9457SAndroid Build Coastguard Worker
113*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
114*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
115*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
116*4bdc9457SAndroid Build Coastguard Worker
117*4bdc9457SAndroid Build Coastguard Worker      uint16_t vh = vexph + vmanth;
118*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
119*4bdc9457SAndroid Build Coastguard Worker        vh = vnanh;
120*4bdc9457SAndroid Build Coastguard Worker      }
121*4bdc9457SAndroid Build Coastguard Worker      vh |= vsignh;
122*4bdc9457SAndroid Build Coastguard Worker
123*4bdc9457SAndroid Build Coastguard Worker      *o++ = vh;
124*4bdc9457SAndroid Build Coastguard Worker
125*4bdc9457SAndroid Build Coastguard Worker      n -= sizeof(float);
126*4bdc9457SAndroid Build Coastguard Worker    } while (n != 0);
127*4bdc9457SAndroid Build Coastguard Worker  $elif BATCH_TILE == 2:
128*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
129*4bdc9457SAndroid Build Coastguard Worker      const float vx = *input;
130*4bdc9457SAndroid Build Coastguard Worker
131*4bdc9457SAndroid Build Coastguard Worker      const float vabsx = fabsf(vx);
132*4bdc9457SAndroid Build Coastguard Worker      uint32_t vsignw = float_as_uint32(vx);
133*4bdc9457SAndroid Build Coastguard Worker
134*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vnonsignw = float_as_uint32(vabsx);
135*4bdc9457SAndroid Build Coastguard Worker      float vf = vabsx * vscale_to_inf;
136*4bdc9457SAndroid Build Coastguard Worker
137*4bdc9457SAndroid Build Coastguard Worker      uint32_t vbias = vnonsignw + vexp_bias;
138*4bdc9457SAndroid Build Coastguard Worker      vsignw ^= vnonsignw;
139*4bdc9457SAndroid Build Coastguard Worker
140*4bdc9457SAndroid Build Coastguard Worker      vf *= vscale_to_zero;
141*4bdc9457SAndroid Build Coastguard Worker      vbias &= vexpw_max;
142*4bdc9457SAndroid Build Coastguard Worker
143*4bdc9457SAndroid Build Coastguard Worker      vbias = math_max_u32(vbias, vbias_min);
144*4bdc9457SAndroid Build Coastguard Worker
145*4bdc9457SAndroid Build Coastguard Worker      vf += uint32_as_float(vbias);
146*4bdc9457SAndroid Build Coastguard Worker
147*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vbits = float_as_uint32(vf);
148*4bdc9457SAndroid Build Coastguard Worker
149*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
150*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
151*4bdc9457SAndroid Build Coastguard Worker      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
152*4bdc9457SAndroid Build Coastguard Worker
153*4bdc9457SAndroid Build Coastguard Worker      uint16_t vh = vexph + vmanth;
154*4bdc9457SAndroid Build Coastguard Worker      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
155*4bdc9457SAndroid Build Coastguard Worker        vh = vnanh;
156*4bdc9457SAndroid Build Coastguard Worker      }
157*4bdc9457SAndroid Build Coastguard Worker      vh |= vsignh;
158*4bdc9457SAndroid Build Coastguard Worker
159*4bdc9457SAndroid Build Coastguard Worker      *o = vh;
160*4bdc9457SAndroid Build Coastguard Worker    }
161*4bdc9457SAndroid Build Coastguard Worker  $else:
162*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
163*4bdc9457SAndroid Build Coastguard Worker      do {
164*4bdc9457SAndroid Build Coastguard Worker        const float vx = *input++;
165*4bdc9457SAndroid Build Coastguard Worker
166*4bdc9457SAndroid Build Coastguard Worker        const float vabsx = fabsf(vx);
167*4bdc9457SAndroid Build Coastguard Worker        uint32_t vsignw = float_as_uint32(vx);
168*4bdc9457SAndroid Build Coastguard Worker
169*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vnonsignw = float_as_uint32(vabsx);
170*4bdc9457SAndroid Build Coastguard Worker        float vf = vabsx * vscale_to_inf;
171*4bdc9457SAndroid Build Coastguard Worker
172*4bdc9457SAndroid Build Coastguard Worker        uint32_t vbias = vnonsignw + vexp_bias;
173*4bdc9457SAndroid Build Coastguard Worker        vsignw ^= vnonsignw;
174*4bdc9457SAndroid Build Coastguard Worker
175*4bdc9457SAndroid Build Coastguard Worker        vf *= vscale_to_zero;
176*4bdc9457SAndroid Build Coastguard Worker        vbias &= vexpw_max;
177*4bdc9457SAndroid Build Coastguard Worker
178*4bdc9457SAndroid Build Coastguard Worker        vbias = math_max_u32(vbias, vbias_min);
179*4bdc9457SAndroid Build Coastguard Worker
180*4bdc9457SAndroid Build Coastguard Worker        vf += uint32_as_float(vbias);
181*4bdc9457SAndroid Build Coastguard Worker
182*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vbits = float_as_uint32(vf);
183*4bdc9457SAndroid Build Coastguard Worker
184*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
185*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
186*4bdc9457SAndroid Build Coastguard Worker        const uint16_t vsignh = (uint16_t) (vsignw >> 16);
187*4bdc9457SAndroid Build Coastguard Worker
188*4bdc9457SAndroid Build Coastguard Worker        uint16_t vh = vexph + vmanth;
189*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
190*4bdc9457SAndroid Build Coastguard Worker          vh = vnanh;
191*4bdc9457SAndroid Build Coastguard Worker        }
192*4bdc9457SAndroid Build Coastguard Worker        vh |= vsignh;
193*4bdc9457SAndroid Build Coastguard Worker
194*4bdc9457SAndroid Build Coastguard Worker        *o++ = vh;
195*4bdc9457SAndroid Build Coastguard Worker
196*4bdc9457SAndroid Build Coastguard Worker        n -= sizeof(float);
197*4bdc9457SAndroid Build Coastguard Worker      } while (n != 0);
198*4bdc9457SAndroid Build Coastguard Worker    }
199*4bdc9457SAndroid Build Coastguard Worker}
200