xref: /aosp_15_r20/external/XNNPACK/src/f32-velu/scalar-rr2-lut16-p3.c.in (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2020 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker//
3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1
7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h>
8*4bdc9457SAndroid Build Coastguard Worker#include <math.h>
9*4bdc9457SAndroid Build Coastguard Worker
10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h>
11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h>
12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vunary.h>
13*4bdc9457SAndroid Build Coastguard Worker
14*4bdc9457SAndroid Build Coastguard Worker
15*4bdc9457SAndroid Build Coastguard Workerextern XNN_INTERNAL const uint32_t xnn_table_exp2minus_k_over_16[16];
16*4bdc9457SAndroid Build Coastguard Worker
17*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_velu_ukernel__${"wasm" if WASM else "scalar"}_rr2_lut16_p3_x${BATCH_TILE}(
18*4bdc9457SAndroid Build Coastguard Worker    size_t n,
19*4bdc9457SAndroid Build Coastguard Worker    const float* x,
20*4bdc9457SAndroid Build Coastguard Worker    float* y,
21*4bdc9457SAndroid Build Coastguard Worker    const union xnn_f32_elu_params params[restrict XNN_MIN_ELEMENTS(1)])
22*4bdc9457SAndroid Build Coastguard Worker{
23*4bdc9457SAndroid Build Coastguard Worker  assert(n % sizeof(float) == 0);
24*4bdc9457SAndroid Build Coastguard Worker
25*4bdc9457SAndroid Build Coastguard Worker  const float vprescale = params->scalar_rr2_lut16_p3.prescale;
26*4bdc9457SAndroid Build Coastguard Worker  const float valpha = params->scalar_rr2_lut16_p3.alpha;
27*4bdc9457SAndroid Build Coastguard Worker  const float vbeta = params->scalar_rr2_lut16_p3.beta;
28*4bdc9457SAndroid Build Coastguard Worker  const float vmagic_bias = params->scalar_rr2_lut16_p3.magic_bias;
29*4bdc9457SAndroid Build Coastguard Worker  const float vlog2e = params->scalar_rr2_lut16_p3.log2e;
30*4bdc9457SAndroid Build Coastguard Worker  const uint32_t vindex_mask = UINT32_C(0xF);
31*4bdc9457SAndroid Build Coastguard Worker  const float vsat_cutoff = params->scalar_rr2_lut16_p3.sat_cutoff;
32*4bdc9457SAndroid Build Coastguard Worker  const float vminus_ln2_hi = params->scalar_rr2_lut16_p3.minus_ln2_hi;
33*4bdc9457SAndroid Build Coastguard Worker  const float vminus_ln2_lo = params->scalar_rr2_lut16_p3.minus_ln2_lo;
34*4bdc9457SAndroid Build Coastguard Worker  const float vc3 = params->scalar_rr2_lut16_p3.c3;
35*4bdc9457SAndroid Build Coastguard Worker  const float vc2 = params->scalar_rr2_lut16_p3.c2;
36*4bdc9457SAndroid Build Coastguard Worker  const float vone = params->scalar_rr2_lut16_p3.one;
37*4bdc9457SAndroid Build Coastguard Worker
38*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE > 1:
39*4bdc9457SAndroid Build Coastguard Worker    for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) {
40*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
41*4bdc9457SAndroid Build Coastguard Worker        float vx${N} = x[${N}];
42*4bdc9457SAndroid Build Coastguard Worker      x += ${BATCH_TILE};
43*4bdc9457SAndroid Build Coastguard Worker
44*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
45*4bdc9457SAndroid Build Coastguard Worker        $if WASM:
46*4bdc9457SAndroid Build Coastguard Worker          const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff), 0.0f);
47*4bdc9457SAndroid Build Coastguard Worker        $else:
48*4bdc9457SAndroid Build Coastguard Worker          const float vz${N} = vx${N} * vprescale;
49*4bdc9457SAndroid Build Coastguard Worker
50*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
51*4bdc9457SAndroid Build Coastguard Worker        float vn${N} = vz${N} * vlog2e + vmagic_bias;
52*4bdc9457SAndroid Build Coastguard Worker
53*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
54*4bdc9457SAndroid Build Coastguard Worker        const uint32_t ven${N} = float_as_uint32(vn${N}) << 19;
55*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vidx${N} = float_as_uint32(vn${N}) & vindex_mask;
56*4bdc9457SAndroid Build Coastguard Worker        vn${N} -= vmagic_bias;
57*4bdc9457SAndroid Build Coastguard Worker
58*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
59*4bdc9457SAndroid Build Coastguard Worker        float vt${N} = vn${N} * vminus_ln2_hi + vz${N};
60*4bdc9457SAndroid Build Coastguard Worker        float vs${N} = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx${N}] + ven${N});
61*4bdc9457SAndroid Build Coastguard Worker
62*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
63*4bdc9457SAndroid Build Coastguard Worker        vt${N} = vn${N} * vminus_ln2_lo + vt${N};
64*4bdc9457SAndroid Build Coastguard Worker        $if not WASM:
65*4bdc9457SAndroid Build Coastguard Worker          if XNN_UNPREDICTABLE(vz${N} <= vsat_cutoff) {
66*4bdc9457SAndroid Build Coastguard Worker            vs${N} = 0.0f;
67*4bdc9457SAndroid Build Coastguard Worker            vt${N} = 0.0f;
68*4bdc9457SAndroid Build Coastguard Worker          }
69*4bdc9457SAndroid Build Coastguard Worker
70*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
71*4bdc9457SAndroid Build Coastguard Worker        float vp${N} = vc3 * vt${N} + vc2;
72*4bdc9457SAndroid Build Coastguard Worker
73*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
74*4bdc9457SAndroid Build Coastguard Worker        vp${N} *= vt${N};
75*4bdc9457SAndroid Build Coastguard Worker
76*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
77*4bdc9457SAndroid Build Coastguard Worker        vt${N} *= vs${N};
78*4bdc9457SAndroid Build Coastguard Worker        vs${N} -= vone;
79*4bdc9457SAndroid Build Coastguard Worker
80*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
81*4bdc9457SAndroid Build Coastguard Worker        vp${N} = vp${N} * vt${N} + vt${N};
82*4bdc9457SAndroid Build Coastguard Worker
83*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
84*4bdc9457SAndroid Build Coastguard Worker        const float ve${N} = (vp${N} + vs${N}) * valpha;
85*4bdc9457SAndroid Build Coastguard Worker        $if WASM:
86*4bdc9457SAndroid Build Coastguard Worker          float vy${N} = __builtin_wasm_max_f32(vx${N} * vbeta, 0.0f);
87*4bdc9457SAndroid Build Coastguard Worker        $else:
88*4bdc9457SAndroid Build Coastguard Worker          float vy${N} = vx${N} * vbeta;
89*4bdc9457SAndroid Build Coastguard Worker
90*4bdc9457SAndroid Build Coastguard Worker      $if WASM:
91*4bdc9457SAndroid Build Coastguard Worker        $for N in range(BATCH_TILE):
92*4bdc9457SAndroid Build Coastguard Worker          vy${N} += __builtin_wasm_min_f32(ve${N}, 0.0f);
93*4bdc9457SAndroid Build Coastguard Worker      $else:
94*4bdc9457SAndroid Build Coastguard Worker        $for N in range(BATCH_TILE):
95*4bdc9457SAndroid Build Coastguard Worker          if XNN_UNPREDICTABLE(vx${N} < 0.0f) {
96*4bdc9457SAndroid Build Coastguard Worker            vy${N} = ve${N};
97*4bdc9457SAndroid Build Coastguard Worker          }
98*4bdc9457SAndroid Build Coastguard Worker
99*4bdc9457SAndroid Build Coastguard Worker      $for N in range(BATCH_TILE):
100*4bdc9457SAndroid Build Coastguard Worker        y[${N}] = vy${N};
101*4bdc9457SAndroid Build Coastguard Worker      y += ${BATCH_TILE};
102*4bdc9457SAndroid Build Coastguard Worker    }
103*4bdc9457SAndroid Build Coastguard Worker  $if BATCH_TILE == 1:
104*4bdc9457SAndroid Build Coastguard Worker    do {
105*4bdc9457SAndroid Build Coastguard Worker      float vx = *x++;
106*4bdc9457SAndroid Build Coastguard Worker
107*4bdc9457SAndroid Build Coastguard Worker      $if WASM:
108*4bdc9457SAndroid Build Coastguard Worker        const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
109*4bdc9457SAndroid Build Coastguard Worker      $else:
110*4bdc9457SAndroid Build Coastguard Worker        const float vz = vx * vprescale;
111*4bdc9457SAndroid Build Coastguard Worker
112*4bdc9457SAndroid Build Coastguard Worker      float vn = vz * vlog2e + vmagic_bias;
113*4bdc9457SAndroid Build Coastguard Worker      const uint32_t ven = float_as_uint32(vn) << 19;
114*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vidx = float_as_uint32(vn) & vindex_mask;
115*4bdc9457SAndroid Build Coastguard Worker      vn -= vmagic_bias;
116*4bdc9457SAndroid Build Coastguard Worker
117*4bdc9457SAndroid Build Coastguard Worker      float vt = vn * vminus_ln2_hi + vz;
118*4bdc9457SAndroid Build Coastguard Worker      float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven);
119*4bdc9457SAndroid Build Coastguard Worker
120*4bdc9457SAndroid Build Coastguard Worker      vt = vn * vminus_ln2_lo + vt;
121*4bdc9457SAndroid Build Coastguard Worker      $if not WASM:
122*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vz <= vsat_cutoff) {
123*4bdc9457SAndroid Build Coastguard Worker          vs = 0.0f;
124*4bdc9457SAndroid Build Coastguard Worker          vt = 0.0f;
125*4bdc9457SAndroid Build Coastguard Worker        }
126*4bdc9457SAndroid Build Coastguard Worker
127*4bdc9457SAndroid Build Coastguard Worker      float vp = vc3 * vt + vc2;
128*4bdc9457SAndroid Build Coastguard Worker      vp *= vt;
129*4bdc9457SAndroid Build Coastguard Worker
130*4bdc9457SAndroid Build Coastguard Worker      vt *= vs;
131*4bdc9457SAndroid Build Coastguard Worker      vs -= vone;
132*4bdc9457SAndroid Build Coastguard Worker      vp = vp * vt + vt;
133*4bdc9457SAndroid Build Coastguard Worker      const float ve = (vp + vs) * valpha;
134*4bdc9457SAndroid Build Coastguard Worker
135*4bdc9457SAndroid Build Coastguard Worker      $if WASM:
136*4bdc9457SAndroid Build Coastguard Worker        float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f);
137*4bdc9457SAndroid Build Coastguard Worker        vy += __builtin_wasm_min_f32(ve, 0.0f);
138*4bdc9457SAndroid Build Coastguard Worker      $else:
139*4bdc9457SAndroid Build Coastguard Worker        float vy = vx * vbeta;
140*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vx < 0.0f) {
141*4bdc9457SAndroid Build Coastguard Worker          vy = ve;
142*4bdc9457SAndroid Build Coastguard Worker        }
143*4bdc9457SAndroid Build Coastguard Worker
144*4bdc9457SAndroid Build Coastguard Worker      *y++ = vy;
145*4bdc9457SAndroid Build Coastguard Worker
146*4bdc9457SAndroid Build Coastguard Worker      n -= sizeof(float);
147*4bdc9457SAndroid Build Coastguard Worker    } while (n != 0);
148*4bdc9457SAndroid Build Coastguard Worker  $elif BATCH_TILE == 2:
149*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
150*4bdc9457SAndroid Build Coastguard Worker      float vx = *x;
151*4bdc9457SAndroid Build Coastguard Worker
152*4bdc9457SAndroid Build Coastguard Worker      $if WASM:
153*4bdc9457SAndroid Build Coastguard Worker        const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
154*4bdc9457SAndroid Build Coastguard Worker      $else:
155*4bdc9457SAndroid Build Coastguard Worker        const float vz = vx * vprescale;
156*4bdc9457SAndroid Build Coastguard Worker
157*4bdc9457SAndroid Build Coastguard Worker      float vn = vz * vlog2e + vmagic_bias;
158*4bdc9457SAndroid Build Coastguard Worker      const uint32_t ven = float_as_uint32(vn) << 19;
159*4bdc9457SAndroid Build Coastguard Worker      const uint32_t vidx = float_as_uint32(vn) & vindex_mask;
160*4bdc9457SAndroid Build Coastguard Worker      vn -= vmagic_bias;
161*4bdc9457SAndroid Build Coastguard Worker
162*4bdc9457SAndroid Build Coastguard Worker      float vt = vn * vminus_ln2_hi + vz;
163*4bdc9457SAndroid Build Coastguard Worker      float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven);
164*4bdc9457SAndroid Build Coastguard Worker
165*4bdc9457SAndroid Build Coastguard Worker      vt = vn * vminus_ln2_lo + vt;
166*4bdc9457SAndroid Build Coastguard Worker      $if not WASM:
167*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vz <= vsat_cutoff) {
168*4bdc9457SAndroid Build Coastguard Worker          vs = 0.0f;
169*4bdc9457SAndroid Build Coastguard Worker          vt = 0.0f;
170*4bdc9457SAndroid Build Coastguard Worker        }
171*4bdc9457SAndroid Build Coastguard Worker
172*4bdc9457SAndroid Build Coastguard Worker      float vp = vc3 * vt + vc2;
173*4bdc9457SAndroid Build Coastguard Worker      vp *= vt;
174*4bdc9457SAndroid Build Coastguard Worker
175*4bdc9457SAndroid Build Coastguard Worker      vt *= vs;
176*4bdc9457SAndroid Build Coastguard Worker      vs -= vone;
177*4bdc9457SAndroid Build Coastguard Worker      vp = vp * vt + vt;
178*4bdc9457SAndroid Build Coastguard Worker      const float ve = (vp + vs) * valpha;
179*4bdc9457SAndroid Build Coastguard Worker
180*4bdc9457SAndroid Build Coastguard Worker      $if WASM:
181*4bdc9457SAndroid Build Coastguard Worker        float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f);
182*4bdc9457SAndroid Build Coastguard Worker        vy += __builtin_wasm_min_f32(ve, 0.0f);
183*4bdc9457SAndroid Build Coastguard Worker      $else:
184*4bdc9457SAndroid Build Coastguard Worker        float vy = vx * vbeta;
185*4bdc9457SAndroid Build Coastguard Worker        if XNN_UNPREDICTABLE(vx < 0.0f) {
186*4bdc9457SAndroid Build Coastguard Worker          vy = ve;
187*4bdc9457SAndroid Build Coastguard Worker        }
188*4bdc9457SAndroid Build Coastguard Worker
189*4bdc9457SAndroid Build Coastguard Worker      *y = vy;
190*4bdc9457SAndroid Build Coastguard Worker    }
191*4bdc9457SAndroid Build Coastguard Worker  $else:
192*4bdc9457SAndroid Build Coastguard Worker    if XNN_UNLIKELY(n != 0) {
193*4bdc9457SAndroid Build Coastguard Worker      do {
194*4bdc9457SAndroid Build Coastguard Worker        float vx = *x++;
195*4bdc9457SAndroid Build Coastguard Worker
196*4bdc9457SAndroid Build Coastguard Worker        $if WASM:
197*4bdc9457SAndroid Build Coastguard Worker          const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f);
198*4bdc9457SAndroid Build Coastguard Worker        $else:
199*4bdc9457SAndroid Build Coastguard Worker          const float vz = vx * vprescale;
200*4bdc9457SAndroid Build Coastguard Worker
201*4bdc9457SAndroid Build Coastguard Worker        float vn = vz * vlog2e + vmagic_bias;
202*4bdc9457SAndroid Build Coastguard Worker        const uint32_t ven = float_as_uint32(vn) << 19;
203*4bdc9457SAndroid Build Coastguard Worker        const uint32_t vidx = float_as_uint32(vn) & vindex_mask;
204*4bdc9457SAndroid Build Coastguard Worker        vn -= vmagic_bias;
205*4bdc9457SAndroid Build Coastguard Worker
206*4bdc9457SAndroid Build Coastguard Worker        float vt = vn * vminus_ln2_hi + vz;
207*4bdc9457SAndroid Build Coastguard Worker        float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven);
208*4bdc9457SAndroid Build Coastguard Worker
209*4bdc9457SAndroid Build Coastguard Worker        vt = vn * vminus_ln2_lo + vt;
210*4bdc9457SAndroid Build Coastguard Worker        $if not WASM:
211*4bdc9457SAndroid Build Coastguard Worker          if XNN_UNPREDICTABLE(vz <= vsat_cutoff) {
212*4bdc9457SAndroid Build Coastguard Worker            vs = 0.0f;
213*4bdc9457SAndroid Build Coastguard Worker            vt = 0.0f;
214*4bdc9457SAndroid Build Coastguard Worker          }
215*4bdc9457SAndroid Build Coastguard Worker
216*4bdc9457SAndroid Build Coastguard Worker        float vp = vc3 * vt + vc2;
217*4bdc9457SAndroid Build Coastguard Worker        vp *= vt;
218*4bdc9457SAndroid Build Coastguard Worker
219*4bdc9457SAndroid Build Coastguard Worker        vt *= vs;
220*4bdc9457SAndroid Build Coastguard Worker        vs -= vone;
221*4bdc9457SAndroid Build Coastguard Worker        vp = vp * vt + vt;
222*4bdc9457SAndroid Build Coastguard Worker        const float ve = (vp + vs) * valpha;
223*4bdc9457SAndroid Build Coastguard Worker
224*4bdc9457SAndroid Build Coastguard Worker        $if WASM:
225*4bdc9457SAndroid Build Coastguard Worker          float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f);
226*4bdc9457SAndroid Build Coastguard Worker          vy += __builtin_wasm_min_f32(ve, 0.0f);
227*4bdc9457SAndroid Build Coastguard Worker        $else:
228*4bdc9457SAndroid Build Coastguard Worker          float vy = vx * vbeta;
229*4bdc9457SAndroid Build Coastguard Worker          if XNN_UNPREDICTABLE(vx < 0.0f) {
230*4bdc9457SAndroid Build Coastguard Worker            vy = ve;
231*4bdc9457SAndroid Build Coastguard Worker          }
232*4bdc9457SAndroid Build Coastguard Worker
233*4bdc9457SAndroid Build Coastguard Worker        *y++ = vy;
234*4bdc9457SAndroid Build Coastguard Worker
235*4bdc9457SAndroid Build Coastguard Worker        n -= sizeof(float);
236*4bdc9457SAndroid Build Coastguard Worker      } while (n != 0);
237*4bdc9457SAndroid Build Coastguard Worker    }
238*4bdc9457SAndroid Build Coastguard Worker}
239