xref: /aosp_15_r20/external/XNNPACK/src/f32-f16-vcvt/scalar-fabsf.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6$assert BATCH_TILE >= 1
7#include <assert.h>
8#include <math.h>
9
10#include <xnnpack/common.h>
11#include <xnnpack/math.h>
12#include <xnnpack/vcvt.h>
13
14
15void xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x${BATCH_TILE}(
16    size_t n,
17    const float* input,
18    void* output,
19    const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
20{
21  assert(n != 0);
22  assert(n % sizeof(float) == 0);
23  assert(input != NULL);
24  assert(output != NULL);
25
26  const float vscale_to_inf = params->scalar_fabsf.scale_to_inf;
27  const uint32_t vexp_bias = params->scalar_fabsf.exp_bias;
28  const float vscale_to_zero = params->scalar_fabsf.scale_to_zero;
29  const uint32_t vexpw_max = params->scalar_fabsf.expw_max;
30  const uint32_t vbias_min = params->scalar_fabsf.bias_min;
31  const uint16_t vexph_mask = params->scalar_fabsf.exph_mask;
32  const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask;
33  const uint16_t vnanh = params->scalar_fabsf.nanh;
34
35  uint16_t* o = (uint16_t*) output;
36  $if BATCH_TILE > 1:
37    for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
38      $for N in range(BATCH_TILE):
39        const float vx${N} = input[${N}];
40      input += ${BATCH_TILE};
41
42      $for N in range(BATCH_TILE):
43        const float vabsx${N} = fabsf(vx${N});
44      $for N in range(BATCH_TILE):
45        uint32_t vsignw${N} = float_as_uint32(vx${N});
46
47      $for N in range(BATCH_TILE):
48        const uint32_t vnonsignw${N} = float_as_uint32(vabsx${N});
49      $for N in range(BATCH_TILE):
50        float vf${N} = vabsx${N} * vscale_to_inf;
51
52      $for N in range(BATCH_TILE):
53        uint32_t vbias${N} = vnonsignw${N} + vexp_bias;
54      $for N in range(BATCH_TILE):
55        vsignw${N} ^= vnonsignw${N};
56
57      $for N in range(BATCH_TILE):
58        vf${N} *= vscale_to_zero;
59      $for N in range(BATCH_TILE):
60        vbias${N} &= vexpw_max;
61
62      $for N in range(BATCH_TILE):
63        vbias${N} = math_max_u32(vbias${N}, vbias_min);
64
65      $for N in range(BATCH_TILE):
66        vf${N} += uint32_as_float(vbias${N});
67
68      $for N in range(BATCH_TILE):
69        const uint32_t vbits${N} = float_as_uint32(vf${N});
70
71      $for N in range(BATCH_TILE):
72        const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask;
73      $for N in range(BATCH_TILE):
74        const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask;
75      $for N in range(BATCH_TILE):
76        const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16);
77
78      $for N in range(BATCH_TILE):
79        uint16_t vh${N} = vexph${N} + vmanth${N};
80      $for N in range(BATCH_TILE):
81        if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) {
82          vh${N} = vnanh;
83        }
84      $for N in range(BATCH_TILE):
85        vh${N} |= vsignh${N};
86
87      $for N in range(BATCH_TILE):
88        o[${N}] = vh${N};
89      o += ${BATCH_TILE};
90    }
91  $if BATCH_TILE == 1:
92    do {
93      const float vx = *input++;
94
95      const float vabsx = fabsf(vx);
96      uint32_t vsignw = float_as_uint32(vx);
97
98      const uint32_t vnonsignw = float_as_uint32(vabsx);
99      float vf = vabsx * vscale_to_inf;
100
101      uint32_t vbias = vnonsignw + vexp_bias;
102      vsignw ^= vnonsignw;
103
104      vf *= vscale_to_zero;
105      vbias &= vexpw_max;
106
107      vbias = math_max_u32(vbias, vbias_min);
108
109      vf += uint32_as_float(vbias);
110
111      const uint32_t vbits = float_as_uint32(vf);
112
113      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
114      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
115      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
116
117      uint16_t vh = vexph + vmanth;
118      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
119        vh = vnanh;
120      }
121      vh |= vsignh;
122
123      *o++ = vh;
124
125      n -= sizeof(float);
126    } while (n != 0);
127  $elif BATCH_TILE == 2:
128    if XNN_UNLIKELY(n != 0) {
129      const float vx = *input;
130
131      const float vabsx = fabsf(vx);
132      uint32_t vsignw = float_as_uint32(vx);
133
134      const uint32_t vnonsignw = float_as_uint32(vabsx);
135      float vf = vabsx * vscale_to_inf;
136
137      uint32_t vbias = vnonsignw + vexp_bias;
138      vsignw ^= vnonsignw;
139
140      vf *= vscale_to_zero;
141      vbias &= vexpw_max;
142
143      vbias = math_max_u32(vbias, vbias_min);
144
145      vf += uint32_as_float(vbias);
146
147      const uint32_t vbits = float_as_uint32(vf);
148
149      const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
150      const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
151      const uint16_t vsignh = (uint16_t) (vsignw >> 16);
152
153      uint16_t vh = vexph + vmanth;
154      if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
155        vh = vnanh;
156      }
157      vh |= vsignh;
158
159      *o = vh;
160    }
161  $else:
162    if XNN_UNLIKELY(n != 0) {
163      do {
164        const float vx = *input++;
165
166        const float vabsx = fabsf(vx);
167        uint32_t vsignw = float_as_uint32(vx);
168
169        const uint32_t vnonsignw = float_as_uint32(vabsx);
170        float vf = vabsx * vscale_to_inf;
171
172        uint32_t vbias = vnonsignw + vexp_bias;
173        vsignw ^= vnonsignw;
174
175        vf *= vscale_to_zero;
176        vbias &= vexpw_max;
177
178        vbias = math_max_u32(vbias, vbias_min);
179
180        vf += uint32_as_float(vbias);
181
182        const uint32_t vbits = float_as_uint32(vf);
183
184        const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
185        const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
186        const uint16_t vsignh = (uint16_t) (vsignw >> 16);
187
188        uint16_t vh = vexph + vmanth;
189        if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
190          vh = vnanh;
191        }
192        vh |= vsignh;
193
194        *o++ = vh;
195
196        n -= sizeof(float);
197      } while (n != 0);
198    }
199}
200