1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1 7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 8*4bdc9457SAndroid Build Coastguard Worker#include <math.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vcvt.h> 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x${BATCH_TILE}( 16*4bdc9457SAndroid Build Coastguard Worker size_t n, 17*4bdc9457SAndroid Build Coastguard Worker const float* input, 18*4bdc9457SAndroid Build Coastguard Worker void* output, 19*4bdc9457SAndroid Build Coastguard Worker const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 20*4bdc9457SAndroid Build Coastguard Worker{ 21*4bdc9457SAndroid Build Coastguard Worker assert(n != 0); 22*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(float) == 0); 23*4bdc9457SAndroid Build Coastguard Worker assert(input != NULL); 24*4bdc9457SAndroid Build Coastguard Worker assert(output != NULL); 25*4bdc9457SAndroid Build Coastguard Worker 26*4bdc9457SAndroid Build Coastguard Worker const float vscale_to_inf = params->scalar_fabsf.scale_to_inf; 27*4bdc9457SAndroid Build Coastguard Worker const uint32_t vexp_bias = params->scalar_fabsf.exp_bias; 28*4bdc9457SAndroid Build Coastguard Worker const float vscale_to_zero = params->scalar_fabsf.scale_to_zero; 29*4bdc9457SAndroid Build Coastguard Worker const uint32_t vexpw_max = params->scalar_fabsf.expw_max; 30*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbias_min = params->scalar_fabsf.bias_min; 31*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph_mask = params->scalar_fabsf.exph_mask; 32*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask; 33*4bdc9457SAndroid Build Coastguard Worker const uint16_t vnanh = params->scalar_fabsf.nanh; 34*4bdc9457SAndroid Build Coastguard Worker 35*4bdc9457SAndroid Build Coastguard Worker uint16_t* o = (uint16_t*) output; 36*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 1: 37*4bdc9457SAndroid Build Coastguard Worker for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 38*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 39*4bdc9457SAndroid Build Coastguard Worker const float vx${N} = input[${N}]; 40*4bdc9457SAndroid Build Coastguard Worker input += ${BATCH_TILE}; 41*4bdc9457SAndroid Build Coastguard Worker 42*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 43*4bdc9457SAndroid Build Coastguard Worker const float vabsx${N} = fabsf(vx${N}); 44*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 45*4bdc9457SAndroid Build Coastguard Worker uint32_t vsignw${N} = float_as_uint32(vx${N}); 46*4bdc9457SAndroid Build Coastguard Worker 47*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 48*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw${N} = float_as_uint32(vabsx${N}); 49*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 50*4bdc9457SAndroid Build Coastguard Worker float vf${N} = vabsx${N} * vscale_to_inf; 51*4bdc9457SAndroid Build Coastguard Worker 52*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 53*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias${N} = vnonsignw${N} + vexp_bias; 54*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 55*4bdc9457SAndroid Build Coastguard Worker vsignw${N} ^= vnonsignw${N}; 56*4bdc9457SAndroid Build Coastguard Worker 57*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 58*4bdc9457SAndroid Build Coastguard Worker vf${N} *= vscale_to_zero; 59*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 60*4bdc9457SAndroid Build Coastguard Worker vbias${N} &= vexpw_max; 61*4bdc9457SAndroid Build Coastguard Worker 62*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 63*4bdc9457SAndroid Build Coastguard Worker vbias${N} = math_max_u32(vbias${N}, vbias_min); 64*4bdc9457SAndroid Build Coastguard Worker 65*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 66*4bdc9457SAndroid Build Coastguard Worker vf${N} += uint32_as_float(vbias${N}); 67*4bdc9457SAndroid Build Coastguard Worker 68*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 69*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits${N} = float_as_uint32(vf${N}); 70*4bdc9457SAndroid Build Coastguard Worker 71*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 72*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask; 73*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 74*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask; 75*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 76*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16); 77*4bdc9457SAndroid Build Coastguard Worker 78*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 79*4bdc9457SAndroid Build Coastguard Worker uint16_t vh${N} = vexph${N} + vmanth${N}; 80*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 81*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) { 82*4bdc9457SAndroid Build Coastguard Worker vh${N} = vnanh; 83*4bdc9457SAndroid Build Coastguard Worker } 84*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 85*4bdc9457SAndroid Build Coastguard Worker vh${N} |= vsignh${N}; 86*4bdc9457SAndroid Build Coastguard Worker 87*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 88*4bdc9457SAndroid Build Coastguard Worker o[${N}] = vh${N}; 89*4bdc9457SAndroid Build Coastguard Worker o += ${BATCH_TILE}; 90*4bdc9457SAndroid Build Coastguard Worker } 91*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE == 1: 92*4bdc9457SAndroid Build Coastguard Worker do { 93*4bdc9457SAndroid Build Coastguard Worker const float vx = *input++; 94*4bdc9457SAndroid Build Coastguard Worker 95*4bdc9457SAndroid Build Coastguard Worker const float vabsx = fabsf(vx); 96*4bdc9457SAndroid Build Coastguard Worker uint32_t vsignw = float_as_uint32(vx); 97*4bdc9457SAndroid Build Coastguard Worker 98*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = float_as_uint32(vabsx); 99*4bdc9457SAndroid Build Coastguard Worker float vf = vabsx * vscale_to_inf; 100*4bdc9457SAndroid Build Coastguard Worker 101*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 102*4bdc9457SAndroid Build Coastguard Worker vsignw ^= vnonsignw; 103*4bdc9457SAndroid Build Coastguard Worker 104*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 105*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 106*4bdc9457SAndroid Build Coastguard Worker 107*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 108*4bdc9457SAndroid Build Coastguard Worker 109*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 110*4bdc9457SAndroid Build Coastguard Worker 111*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 112*4bdc9457SAndroid Build Coastguard Worker 113*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 114*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 115*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 116*4bdc9457SAndroid Build Coastguard Worker 117*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 118*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 119*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 120*4bdc9457SAndroid Build Coastguard Worker } 121*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 122*4bdc9457SAndroid Build Coastguard Worker 123*4bdc9457SAndroid Build Coastguard Worker *o++ = vh; 124*4bdc9457SAndroid Build Coastguard Worker 125*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 126*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 127*4bdc9457SAndroid Build Coastguard Worker $elif BATCH_TILE == 2: 128*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 129*4bdc9457SAndroid Build Coastguard Worker const float vx = *input; 130*4bdc9457SAndroid Build Coastguard Worker 131*4bdc9457SAndroid Build Coastguard Worker const float vabsx = fabsf(vx); 132*4bdc9457SAndroid Build Coastguard Worker uint32_t vsignw = float_as_uint32(vx); 133*4bdc9457SAndroid Build Coastguard Worker 134*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = float_as_uint32(vabsx); 135*4bdc9457SAndroid Build Coastguard Worker float vf = vabsx * vscale_to_inf; 136*4bdc9457SAndroid Build Coastguard Worker 137*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 138*4bdc9457SAndroid Build Coastguard Worker vsignw ^= vnonsignw; 139*4bdc9457SAndroid Build Coastguard Worker 140*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 141*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 142*4bdc9457SAndroid Build Coastguard Worker 143*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 144*4bdc9457SAndroid Build Coastguard Worker 145*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 146*4bdc9457SAndroid Build Coastguard Worker 147*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 148*4bdc9457SAndroid Build Coastguard Worker 149*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 150*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 151*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 152*4bdc9457SAndroid Build Coastguard Worker 153*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 154*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 155*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 156*4bdc9457SAndroid Build Coastguard Worker } 157*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 158*4bdc9457SAndroid Build Coastguard Worker 159*4bdc9457SAndroid Build Coastguard Worker *o = vh; 160*4bdc9457SAndroid Build Coastguard Worker } 161*4bdc9457SAndroid Build Coastguard Worker $else: 162*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 163*4bdc9457SAndroid Build Coastguard Worker do { 164*4bdc9457SAndroid Build Coastguard Worker const float vx = *input++; 165*4bdc9457SAndroid Build Coastguard Worker 166*4bdc9457SAndroid Build Coastguard Worker const float vabsx = fabsf(vx); 167*4bdc9457SAndroid Build Coastguard Worker uint32_t vsignw = float_as_uint32(vx); 168*4bdc9457SAndroid Build Coastguard Worker 169*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = float_as_uint32(vabsx); 170*4bdc9457SAndroid Build Coastguard Worker float vf = vabsx * vscale_to_inf; 171*4bdc9457SAndroid Build Coastguard Worker 172*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 173*4bdc9457SAndroid Build Coastguard Worker vsignw ^= vnonsignw; 174*4bdc9457SAndroid Build Coastguard Worker 175*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 176*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 177*4bdc9457SAndroid Build Coastguard Worker 178*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 179*4bdc9457SAndroid Build Coastguard Worker 180*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 181*4bdc9457SAndroid Build Coastguard Worker 182*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 183*4bdc9457SAndroid Build Coastguard Worker 184*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 185*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 186*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 187*4bdc9457SAndroid Build Coastguard Worker 188*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 189*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 190*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 191*4bdc9457SAndroid Build Coastguard Worker } 192*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 193*4bdc9457SAndroid Build Coastguard Worker 194*4bdc9457SAndroid Build Coastguard Worker *o++ = vh; 195*4bdc9457SAndroid Build Coastguard Worker 196*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 197*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 198*4bdc9457SAndroid Build Coastguard Worker } 199*4bdc9457SAndroid Build Coastguard Worker} 200