1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2020 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1 7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 8*4bdc9457SAndroid Build Coastguard Worker#include <math.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vunary.h> 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Workerextern XNN_INTERNAL const uint32_t xnn_table_exp2minus_k_over_16[16]; 16*4bdc9457SAndroid Build Coastguard Worker 17*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_velu_ukernel__${"wasm" if WASM else "scalar"}_rr2_lut16_p3_x${BATCH_TILE}( 18*4bdc9457SAndroid Build Coastguard Worker size_t n, 19*4bdc9457SAndroid Build Coastguard Worker const float* x, 20*4bdc9457SAndroid Build Coastguard Worker float* y, 21*4bdc9457SAndroid Build Coastguard Worker const union xnn_f32_elu_params params[restrict XNN_MIN_ELEMENTS(1)]) 22*4bdc9457SAndroid Build Coastguard Worker{ 23*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(float) == 0); 24*4bdc9457SAndroid Build Coastguard Worker 25*4bdc9457SAndroid Build Coastguard Worker const float vprescale = params->scalar_rr2_lut16_p3.prescale; 26*4bdc9457SAndroid Build Coastguard Worker const float valpha = params->scalar_rr2_lut16_p3.alpha; 27*4bdc9457SAndroid Build Coastguard Worker const float vbeta = params->scalar_rr2_lut16_p3.beta; 28*4bdc9457SAndroid Build Coastguard Worker const float vmagic_bias = params->scalar_rr2_lut16_p3.magic_bias; 29*4bdc9457SAndroid Build Coastguard Worker const float vlog2e = params->scalar_rr2_lut16_p3.log2e; 30*4bdc9457SAndroid Build Coastguard Worker const uint32_t vindex_mask = UINT32_C(0xF); 31*4bdc9457SAndroid Build Coastguard Worker const float vsat_cutoff = params->scalar_rr2_lut16_p3.sat_cutoff; 32*4bdc9457SAndroid Build Coastguard Worker const float vminus_ln2_hi = params->scalar_rr2_lut16_p3.minus_ln2_hi; 33*4bdc9457SAndroid Build Coastguard Worker const float vminus_ln2_lo = params->scalar_rr2_lut16_p3.minus_ln2_lo; 34*4bdc9457SAndroid Build Coastguard Worker const float vc3 = params->scalar_rr2_lut16_p3.c3; 35*4bdc9457SAndroid Build Coastguard Worker const float vc2 = params->scalar_rr2_lut16_p3.c2; 36*4bdc9457SAndroid Build Coastguard Worker const float vone = params->scalar_rr2_lut16_p3.one; 37*4bdc9457SAndroid Build Coastguard Worker 38*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 1: 39*4bdc9457SAndroid Build Coastguard Worker for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 40*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 41*4bdc9457SAndroid Build Coastguard Worker float vx${N} = x[${N}]; 42*4bdc9457SAndroid Build Coastguard Worker x += ${BATCH_TILE}; 43*4bdc9457SAndroid Build Coastguard Worker 44*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 45*4bdc9457SAndroid Build Coastguard Worker $if WASM: 46*4bdc9457SAndroid Build Coastguard Worker const float vz${N} = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx${N} * vprescale, vsat_cutoff), 0.0f); 47*4bdc9457SAndroid Build Coastguard Worker $else: 48*4bdc9457SAndroid Build Coastguard Worker const float vz${N} = vx${N} * vprescale; 49*4bdc9457SAndroid Build Coastguard Worker 50*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 51*4bdc9457SAndroid Build Coastguard Worker float vn${N} = vz${N} * vlog2e + vmagic_bias; 52*4bdc9457SAndroid Build Coastguard Worker 53*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 54*4bdc9457SAndroid Build Coastguard Worker const uint32_t ven${N} = float_as_uint32(vn${N}) << 19; 55*4bdc9457SAndroid Build Coastguard Worker const uint32_t vidx${N} = float_as_uint32(vn${N}) & vindex_mask; 56*4bdc9457SAndroid Build Coastguard Worker vn${N} -= vmagic_bias; 57*4bdc9457SAndroid Build Coastguard Worker 58*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 59*4bdc9457SAndroid Build Coastguard Worker float vt${N} = vn${N} * vminus_ln2_hi + vz${N}; 60*4bdc9457SAndroid Build Coastguard Worker float vs${N} = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx${N}] + ven${N}); 61*4bdc9457SAndroid Build Coastguard Worker 62*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 63*4bdc9457SAndroid Build Coastguard Worker vt${N} = vn${N} * vminus_ln2_lo + vt${N}; 64*4bdc9457SAndroid Build Coastguard Worker $if not WASM: 65*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vz${N} <= vsat_cutoff) { 66*4bdc9457SAndroid Build Coastguard Worker vs${N} = 0.0f; 67*4bdc9457SAndroid Build Coastguard Worker vt${N} = 0.0f; 68*4bdc9457SAndroid Build Coastguard Worker } 69*4bdc9457SAndroid Build Coastguard Worker 70*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 71*4bdc9457SAndroid Build Coastguard Worker float vp${N} = vc3 * vt${N} + vc2; 72*4bdc9457SAndroid Build Coastguard Worker 73*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 74*4bdc9457SAndroid Build Coastguard Worker vp${N} *= vt${N}; 75*4bdc9457SAndroid Build Coastguard Worker 76*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 77*4bdc9457SAndroid Build Coastguard Worker vt${N} *= vs${N}; 78*4bdc9457SAndroid Build Coastguard Worker vs${N} -= vone; 79*4bdc9457SAndroid Build Coastguard Worker 80*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 81*4bdc9457SAndroid Build Coastguard Worker vp${N} = vp${N} * vt${N} + vt${N}; 82*4bdc9457SAndroid Build Coastguard Worker 83*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 84*4bdc9457SAndroid Build Coastguard Worker const float ve${N} = (vp${N} + vs${N}) * valpha; 85*4bdc9457SAndroid Build Coastguard Worker $if WASM: 86*4bdc9457SAndroid Build Coastguard Worker float vy${N} = __builtin_wasm_max_f32(vx${N} * vbeta, 0.0f); 87*4bdc9457SAndroid Build Coastguard Worker $else: 88*4bdc9457SAndroid Build Coastguard Worker float vy${N} = vx${N} * vbeta; 89*4bdc9457SAndroid Build Coastguard Worker 90*4bdc9457SAndroid Build Coastguard Worker $if WASM: 91*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 92*4bdc9457SAndroid Build Coastguard Worker vy${N} += __builtin_wasm_min_f32(ve${N}, 0.0f); 93*4bdc9457SAndroid Build Coastguard Worker $else: 94*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 95*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vx${N} < 0.0f) { 96*4bdc9457SAndroid Build Coastguard Worker vy${N} = ve${N}; 97*4bdc9457SAndroid Build Coastguard Worker } 98*4bdc9457SAndroid Build Coastguard Worker 99*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 100*4bdc9457SAndroid Build Coastguard Worker y[${N}] = vy${N}; 101*4bdc9457SAndroid Build Coastguard Worker y += ${BATCH_TILE}; 102*4bdc9457SAndroid Build Coastguard Worker } 103*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE == 1: 104*4bdc9457SAndroid Build Coastguard Worker do { 105*4bdc9457SAndroid Build Coastguard Worker float vx = *x++; 106*4bdc9457SAndroid Build Coastguard Worker 107*4bdc9457SAndroid Build Coastguard Worker $if WASM: 108*4bdc9457SAndroid Build Coastguard Worker const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 109*4bdc9457SAndroid Build Coastguard Worker $else: 110*4bdc9457SAndroid Build Coastguard Worker const float vz = vx * vprescale; 111*4bdc9457SAndroid Build Coastguard Worker 112*4bdc9457SAndroid Build Coastguard Worker float vn = vz * vlog2e + vmagic_bias; 113*4bdc9457SAndroid Build Coastguard Worker const uint32_t ven = float_as_uint32(vn) << 19; 114*4bdc9457SAndroid Build Coastguard Worker const uint32_t vidx = float_as_uint32(vn) & vindex_mask; 115*4bdc9457SAndroid Build Coastguard Worker vn -= vmagic_bias; 116*4bdc9457SAndroid Build Coastguard Worker 117*4bdc9457SAndroid Build Coastguard Worker float vt = vn * vminus_ln2_hi + vz; 118*4bdc9457SAndroid Build Coastguard Worker float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven); 119*4bdc9457SAndroid Build Coastguard Worker 120*4bdc9457SAndroid Build Coastguard Worker vt = vn * vminus_ln2_lo + vt; 121*4bdc9457SAndroid Build Coastguard Worker $if not WASM: 122*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 123*4bdc9457SAndroid Build Coastguard Worker vs = 0.0f; 124*4bdc9457SAndroid Build Coastguard Worker vt = 0.0f; 125*4bdc9457SAndroid Build Coastguard Worker } 126*4bdc9457SAndroid Build Coastguard Worker 127*4bdc9457SAndroid Build Coastguard Worker float vp = vc3 * vt + vc2; 128*4bdc9457SAndroid Build Coastguard Worker vp *= vt; 129*4bdc9457SAndroid Build Coastguard Worker 130*4bdc9457SAndroid Build Coastguard Worker vt *= vs; 131*4bdc9457SAndroid Build Coastguard Worker vs -= vone; 132*4bdc9457SAndroid Build Coastguard Worker vp = vp * vt + vt; 133*4bdc9457SAndroid Build Coastguard Worker const float ve = (vp + vs) * valpha; 134*4bdc9457SAndroid Build Coastguard Worker 135*4bdc9457SAndroid Build Coastguard Worker $if WASM: 136*4bdc9457SAndroid Build Coastguard Worker float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f); 137*4bdc9457SAndroid Build Coastguard Worker vy += __builtin_wasm_min_f32(ve, 0.0f); 138*4bdc9457SAndroid Build Coastguard Worker $else: 139*4bdc9457SAndroid Build Coastguard Worker float vy = vx * vbeta; 140*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vx < 0.0f) { 141*4bdc9457SAndroid Build Coastguard Worker vy = ve; 142*4bdc9457SAndroid Build Coastguard Worker } 143*4bdc9457SAndroid Build Coastguard Worker 144*4bdc9457SAndroid Build Coastguard Worker *y++ = vy; 145*4bdc9457SAndroid Build Coastguard Worker 146*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 147*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 148*4bdc9457SAndroid Build Coastguard Worker $elif BATCH_TILE == 2: 149*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 150*4bdc9457SAndroid Build Coastguard Worker float vx = *x; 151*4bdc9457SAndroid Build Coastguard Worker 152*4bdc9457SAndroid Build Coastguard Worker $if WASM: 153*4bdc9457SAndroid Build Coastguard Worker const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 154*4bdc9457SAndroid Build Coastguard Worker $else: 155*4bdc9457SAndroid Build Coastguard Worker const float vz = vx * vprescale; 156*4bdc9457SAndroid Build Coastguard Worker 157*4bdc9457SAndroid Build Coastguard Worker float vn = vz * vlog2e + vmagic_bias; 158*4bdc9457SAndroid Build Coastguard Worker const uint32_t ven = float_as_uint32(vn) << 19; 159*4bdc9457SAndroid Build Coastguard Worker const uint32_t vidx = float_as_uint32(vn) & vindex_mask; 160*4bdc9457SAndroid Build Coastguard Worker vn -= vmagic_bias; 161*4bdc9457SAndroid Build Coastguard Worker 162*4bdc9457SAndroid Build Coastguard Worker float vt = vn * vminus_ln2_hi + vz; 163*4bdc9457SAndroid Build Coastguard Worker float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven); 164*4bdc9457SAndroid Build Coastguard Worker 165*4bdc9457SAndroid Build Coastguard Worker vt = vn * vminus_ln2_lo + vt; 166*4bdc9457SAndroid Build Coastguard Worker $if not WASM: 167*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 168*4bdc9457SAndroid Build Coastguard Worker vs = 0.0f; 169*4bdc9457SAndroid Build Coastguard Worker vt = 0.0f; 170*4bdc9457SAndroid Build Coastguard Worker } 171*4bdc9457SAndroid Build Coastguard Worker 172*4bdc9457SAndroid Build Coastguard Worker float vp = vc3 * vt + vc2; 173*4bdc9457SAndroid Build Coastguard Worker vp *= vt; 174*4bdc9457SAndroid Build Coastguard Worker 175*4bdc9457SAndroid Build Coastguard Worker vt *= vs; 176*4bdc9457SAndroid Build Coastguard Worker vs -= vone; 177*4bdc9457SAndroid Build Coastguard Worker vp = vp * vt + vt; 178*4bdc9457SAndroid Build Coastguard Worker const float ve = (vp + vs) * valpha; 179*4bdc9457SAndroid Build Coastguard Worker 180*4bdc9457SAndroid Build Coastguard Worker $if WASM: 181*4bdc9457SAndroid Build Coastguard Worker float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f); 182*4bdc9457SAndroid Build Coastguard Worker vy += __builtin_wasm_min_f32(ve, 0.0f); 183*4bdc9457SAndroid Build Coastguard Worker $else: 184*4bdc9457SAndroid Build Coastguard Worker float vy = vx * vbeta; 185*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vx < 0.0f) { 186*4bdc9457SAndroid Build Coastguard Worker vy = ve; 187*4bdc9457SAndroid Build Coastguard Worker } 188*4bdc9457SAndroid Build Coastguard Worker 189*4bdc9457SAndroid Build Coastguard Worker *y = vy; 190*4bdc9457SAndroid Build Coastguard Worker } 191*4bdc9457SAndroid Build Coastguard Worker $else: 192*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 193*4bdc9457SAndroid Build Coastguard Worker do { 194*4bdc9457SAndroid Build Coastguard Worker float vx = *x++; 195*4bdc9457SAndroid Build Coastguard Worker 196*4bdc9457SAndroid Build Coastguard Worker $if WASM: 197*4bdc9457SAndroid Build Coastguard Worker const float vz = __builtin_wasm_min_f32(__builtin_wasm_max_f32(vx * vprescale, vsat_cutoff), 0.0f); 198*4bdc9457SAndroid Build Coastguard Worker $else: 199*4bdc9457SAndroid Build Coastguard Worker const float vz = vx * vprescale; 200*4bdc9457SAndroid Build Coastguard Worker 201*4bdc9457SAndroid Build Coastguard Worker float vn = vz * vlog2e + vmagic_bias; 202*4bdc9457SAndroid Build Coastguard Worker const uint32_t ven = float_as_uint32(vn) << 19; 203*4bdc9457SAndroid Build Coastguard Worker const uint32_t vidx = float_as_uint32(vn) & vindex_mask; 204*4bdc9457SAndroid Build Coastguard Worker vn -= vmagic_bias; 205*4bdc9457SAndroid Build Coastguard Worker 206*4bdc9457SAndroid Build Coastguard Worker float vt = vn * vminus_ln2_hi + vz; 207*4bdc9457SAndroid Build Coastguard Worker float vs = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx] + ven); 208*4bdc9457SAndroid Build Coastguard Worker 209*4bdc9457SAndroid Build Coastguard Worker vt = vn * vminus_ln2_lo + vt; 210*4bdc9457SAndroid Build Coastguard Worker $if not WASM: 211*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vz <= vsat_cutoff) { 212*4bdc9457SAndroid Build Coastguard Worker vs = 0.0f; 213*4bdc9457SAndroid Build Coastguard Worker vt = 0.0f; 214*4bdc9457SAndroid Build Coastguard Worker } 215*4bdc9457SAndroid Build Coastguard Worker 216*4bdc9457SAndroid Build Coastguard Worker float vp = vc3 * vt + vc2; 217*4bdc9457SAndroid Build Coastguard Worker vp *= vt; 218*4bdc9457SAndroid Build Coastguard Worker 219*4bdc9457SAndroid Build Coastguard Worker vt *= vs; 220*4bdc9457SAndroid Build Coastguard Worker vs -= vone; 221*4bdc9457SAndroid Build Coastguard Worker vp = vp * vt + vt; 222*4bdc9457SAndroid Build Coastguard Worker const float ve = (vp + vs) * valpha; 223*4bdc9457SAndroid Build Coastguard Worker 224*4bdc9457SAndroid Build Coastguard Worker $if WASM: 225*4bdc9457SAndroid Build Coastguard Worker float vy = __builtin_wasm_max_f32(vx * vbeta, 0.0f); 226*4bdc9457SAndroid Build Coastguard Worker vy += __builtin_wasm_min_f32(ve, 0.0f); 227*4bdc9457SAndroid Build Coastguard Worker $else: 228*4bdc9457SAndroid Build Coastguard Worker float vy = vx * vbeta; 229*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vx < 0.0f) { 230*4bdc9457SAndroid Build Coastguard Worker vy = ve; 231*4bdc9457SAndroid Build Coastguard Worker } 232*4bdc9457SAndroid Build Coastguard Worker 233*4bdc9457SAndroid Build Coastguard Worker *y++ = vy; 234*4bdc9457SAndroid Build Coastguard Worker 235*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 236*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 237*4bdc9457SAndroid Build Coastguard Worker } 238*4bdc9457SAndroid Build Coastguard Worker} 239