1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1 7*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 8*4bdc9457SAndroid Build Coastguard Worker 9*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vcvt.h> 12*4bdc9457SAndroid Build Coastguard Worker 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Workervoid xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x${BATCH_TILE}( 15*4bdc9457SAndroid Build Coastguard Worker size_t n, 16*4bdc9457SAndroid Build Coastguard Worker const float* input, 17*4bdc9457SAndroid Build Coastguard Worker void* output, 18*4bdc9457SAndroid Build Coastguard Worker const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 19*4bdc9457SAndroid Build Coastguard Worker{ 20*4bdc9457SAndroid Build Coastguard Worker assert(n != 0); 21*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(float) == 0); 22*4bdc9457SAndroid Build Coastguard Worker assert(input != NULL); 23*4bdc9457SAndroid Build Coastguard Worker assert(output != NULL); 24*4bdc9457SAndroid Build Coastguard Worker 25*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsign_mask = params->scalar_bitcast.nonsign_mask; 26*4bdc9457SAndroid Build Coastguard Worker const uint32_t vexp_bias = params->scalar_bitcast.exp_bias; 27*4bdc9457SAndroid Build Coastguard Worker const float vscale_to_inf = params->scalar_bitcast.scale_to_inf; 28*4bdc9457SAndroid Build Coastguard Worker const uint32_t vexpw_max = params->scalar_bitcast.expw_max; 29*4bdc9457SAndroid Build Coastguard Worker const float vscale_to_zero = params->scalar_bitcast.scale_to_zero; 30*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbias_min = params->scalar_bitcast.bias_min; 31*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph_mask = params->scalar_bitcast.exph_mask; 32*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth_mask = params->scalar_bitcast.manth_mask; 33*4bdc9457SAndroid Build Coastguard Worker const uint16_t vnanh = params->scalar_bitcast.nanh; 34*4bdc9457SAndroid Build Coastguard Worker 35*4bdc9457SAndroid Build Coastguard Worker const uint32_t* i = (const uint32_t*) input; 36*4bdc9457SAndroid Build Coastguard Worker uint16_t* o = (uint16_t*) output; 37*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 1: 38*4bdc9457SAndroid Build Coastguard Worker for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 39*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 40*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw${N} = i[${N}]; 41*4bdc9457SAndroid Build Coastguard Worker i += ${BATCH_TILE}; 42*4bdc9457SAndroid Build Coastguard Worker 43*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 44*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw${N} = vw${N} & vnonsign_mask; 45*4bdc9457SAndroid Build Coastguard Worker 46*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 47*4bdc9457SAndroid Build Coastguard Worker float vf${N} = uint32_as_float(vnonsignw${N}); 48*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 49*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsignw${N} = vw${N} ^ vnonsignw${N}; 50*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 51*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias${N} = vnonsignw${N} + vexp_bias; 52*4bdc9457SAndroid Build Coastguard Worker 53*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 54*4bdc9457SAndroid Build Coastguard Worker vf${N} *= vscale_to_inf; 55*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 56*4bdc9457SAndroid Build Coastguard Worker vbias${N} &= vexpw_max; 57*4bdc9457SAndroid Build Coastguard Worker 58*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 59*4bdc9457SAndroid Build Coastguard Worker vf${N} *= vscale_to_zero; 60*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 61*4bdc9457SAndroid Build Coastguard Worker vbias${N} = math_max_u32(vbias${N}, vbias_min); 62*4bdc9457SAndroid Build Coastguard Worker 63*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 64*4bdc9457SAndroid Build Coastguard Worker vf${N} += uint32_as_float(vbias${N}); 65*4bdc9457SAndroid Build Coastguard Worker 66*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 67*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits${N} = float_as_uint32(vf${N}); 68*4bdc9457SAndroid Build Coastguard Worker 69*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 70*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask; 71*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 72*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask; 73*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 74*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16); 75*4bdc9457SAndroid Build Coastguard Worker 76*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 77*4bdc9457SAndroid Build Coastguard Worker uint16_t vh${N} = vexph${N} + vmanth${N}; 78*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 79*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) { 80*4bdc9457SAndroid Build Coastguard Worker vh${N} = vnanh; 81*4bdc9457SAndroid Build Coastguard Worker } 82*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 83*4bdc9457SAndroid Build Coastguard Worker vh${N} |= vsignh${N}; 84*4bdc9457SAndroid Build Coastguard Worker 85*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 86*4bdc9457SAndroid Build Coastguard Worker o[${N}] = vh${N}; 87*4bdc9457SAndroid Build Coastguard Worker o += ${BATCH_TILE}; 88*4bdc9457SAndroid Build Coastguard Worker } 89*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE == 1: 90*4bdc9457SAndroid Build Coastguard Worker do { 91*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = *i++; 92*4bdc9457SAndroid Build Coastguard Worker 93*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = vw & vnonsign_mask; 94*4bdc9457SAndroid Build Coastguard Worker 95*4bdc9457SAndroid Build Coastguard Worker float vf = uint32_as_float(vnonsignw); 96*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsignw = vw ^ vnonsignw; 97*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 98*4bdc9457SAndroid Build Coastguard Worker 99*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_inf; 100*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 101*4bdc9457SAndroid Build Coastguard Worker 102*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 103*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 104*4bdc9457SAndroid Build Coastguard Worker 105*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 106*4bdc9457SAndroid Build Coastguard Worker 107*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 108*4bdc9457SAndroid Build Coastguard Worker 109*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 110*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 111*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 112*4bdc9457SAndroid Build Coastguard Worker 113*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 114*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 115*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 116*4bdc9457SAndroid Build Coastguard Worker } 117*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 118*4bdc9457SAndroid Build Coastguard Worker 119*4bdc9457SAndroid Build Coastguard Worker *o++ = vh; 120*4bdc9457SAndroid Build Coastguard Worker 121*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 122*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 123*4bdc9457SAndroid Build Coastguard Worker $elif BATCH_TILE == 2: 124*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 125*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = *i; 126*4bdc9457SAndroid Build Coastguard Worker 127*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = vw & vnonsign_mask; 128*4bdc9457SAndroid Build Coastguard Worker 129*4bdc9457SAndroid Build Coastguard Worker float vf = uint32_as_float(vnonsignw); 130*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsignw = vw ^ vnonsignw; 131*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 132*4bdc9457SAndroid Build Coastguard Worker 133*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_inf; 134*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 135*4bdc9457SAndroid Build Coastguard Worker 136*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 137*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 138*4bdc9457SAndroid Build Coastguard Worker 139*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 140*4bdc9457SAndroid Build Coastguard Worker 141*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 142*4bdc9457SAndroid Build Coastguard Worker 143*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 144*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 145*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 146*4bdc9457SAndroid Build Coastguard Worker 147*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 148*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 149*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 150*4bdc9457SAndroid Build Coastguard Worker } 151*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 152*4bdc9457SAndroid Build Coastguard Worker 153*4bdc9457SAndroid Build Coastguard Worker *o = vh; 154*4bdc9457SAndroid Build Coastguard Worker } 155*4bdc9457SAndroid Build Coastguard Worker $else: 156*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 157*4bdc9457SAndroid Build Coastguard Worker do { 158*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = *i++; 159*4bdc9457SAndroid Build Coastguard Worker 160*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnonsignw = vw & vnonsign_mask; 161*4bdc9457SAndroid Build Coastguard Worker 162*4bdc9457SAndroid Build Coastguard Worker float vf = uint32_as_float(vnonsignw); 163*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsignw = vw ^ vnonsignw; 164*4bdc9457SAndroid Build Coastguard Worker uint32_t vbias = vnonsignw + vexp_bias; 165*4bdc9457SAndroid Build Coastguard Worker 166*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_inf; 167*4bdc9457SAndroid Build Coastguard Worker vbias &= vexpw_max; 168*4bdc9457SAndroid Build Coastguard Worker 169*4bdc9457SAndroid Build Coastguard Worker vf *= vscale_to_zero; 170*4bdc9457SAndroid Build Coastguard Worker vbias = math_max_u32(vbias, vbias_min); 171*4bdc9457SAndroid Build Coastguard Worker 172*4bdc9457SAndroid Build Coastguard Worker vf += uint32_as_float(vbias); 173*4bdc9457SAndroid Build Coastguard Worker 174*4bdc9457SAndroid Build Coastguard Worker const uint32_t vbits = float_as_uint32(vf); 175*4bdc9457SAndroid Build Coastguard Worker 176*4bdc9457SAndroid Build Coastguard Worker const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 177*4bdc9457SAndroid Build Coastguard Worker const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 178*4bdc9457SAndroid Build Coastguard Worker const uint16_t vsignh = (uint16_t) (vsignw >> 16); 179*4bdc9457SAndroid Build Coastguard Worker 180*4bdc9457SAndroid Build Coastguard Worker uint16_t vh = vexph + vmanth; 181*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 182*4bdc9457SAndroid Build Coastguard Worker vh = vnanh; 183*4bdc9457SAndroid Build Coastguard Worker } 184*4bdc9457SAndroid Build Coastguard Worker vh |= vsignh; 185*4bdc9457SAndroid Build Coastguard Worker 186*4bdc9457SAndroid Build Coastguard Worker *o++ = vh; 187*4bdc9457SAndroid Build Coastguard Worker 188*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(float); 189*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 190*4bdc9457SAndroid Build Coastguard Worker } 191*4bdc9457SAndroid Build Coastguard Worker} 192