1*4bdc9457SAndroid Build Coastguard Worker// Copyright 2021 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker// 3*4bdc9457SAndroid Build Coastguard Worker// This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker// LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker$assert BATCH_TILE >= 1 7*4bdc9457SAndroid Build Coastguard Worker$ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 8*4bdc9457SAndroid Build Coastguard Worker#include <assert.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/common.h> 11*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/math.h> 12*4bdc9457SAndroid Build Coastguard Worker#include <xnnpack/vcvt.h> 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker 15*4bdc9457SAndroid Build Coastguard Workervoid xnn_f16_f32_vcvt_ukernel__scalar_x${BATCH_TILE}( 16*4bdc9457SAndroid Build Coastguard Worker size_t n, 17*4bdc9457SAndroid Build Coastguard Worker const void* input, 18*4bdc9457SAndroid Build Coastguard Worker float* output, 19*4bdc9457SAndroid Build Coastguard Worker const union xnn_f16_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 20*4bdc9457SAndroid Build Coastguard Worker{ 21*4bdc9457SAndroid Build Coastguard Worker assert(n != 0); 22*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(uint16_t) == 0); 23*4bdc9457SAndroid Build Coastguard Worker assert(input != NULL); 24*4bdc9457SAndroid Build Coastguard Worker assert(output != NULL); 25*4bdc9457SAndroid Build Coastguard Worker 26*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsign_mask = params->scalar.sign_mask; 27*4bdc9457SAndroid Build Coastguard Worker const uint32_t vexp_offset = params->scalar.exp_offset; 28*4bdc9457SAndroid Build Coastguard Worker const float vexp_scale = params->scalar.exp_scale; 29*4bdc9457SAndroid Build Coastguard Worker const uint32_t vmagic_mask = params->scalar.magic_mask; 30*4bdc9457SAndroid Build Coastguard Worker const float vmagic_bias = params->scalar.magic_bias; 31*4bdc9457SAndroid Build Coastguard Worker const uint32_t vdenorm_cutoff = params->scalar.denorm_cutoff; 32*4bdc9457SAndroid Build Coastguard Worker 33*4bdc9457SAndroid Build Coastguard Worker const uint16_t* i = (const uint16_t*) input; 34*4bdc9457SAndroid Build Coastguard Worker uint32_t* o = (uint32_t*) output; 35*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE > 1: 36*4bdc9457SAndroid Build Coastguard Worker for (; n >= ${BATCH_TILE} * sizeof(uint16_t); n -= ${BATCH_TILE} * sizeof(uint16_t)) { 37*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 38*4bdc9457SAndroid Build Coastguard Worker const uint16_t vh${N} = i[${N}]; 39*4bdc9457SAndroid Build Coastguard Worker i += ${BATCH_TILE}; 40*4bdc9457SAndroid Build Coastguard Worker 41*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 42*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw${N} = (uint32_t) vh${N} << 16; 43*4bdc9457SAndroid Build Coastguard Worker 44*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 45*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsign${N} = vw${N} & vsign_mask; 46*4bdc9457SAndroid Build Coastguard Worker 47*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 48*4bdc9457SAndroid Build Coastguard Worker const uint32_t v2w${N} = vw${N} + vw${N}; 49*4bdc9457SAndroid Build Coastguard Worker 50*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 51*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnorm${N} = float_as_uint32(uint32_as_float((v2w${N} >> 4) + vexp_offset) * vexp_scale); 52*4bdc9457SAndroid Build Coastguard Worker 53*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 54*4bdc9457SAndroid Build Coastguard Worker const uint32_t vdenorm${N} = float_as_uint32(uint32_as_float((v2w${N} >> 17) | vmagic_mask) - vmagic_bias); 55*4bdc9457SAndroid Build Coastguard Worker 56*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 57*4bdc9457SAndroid Build Coastguard Worker const uint32_t vf${N} = vsign${N} | (XNN_UNPREDICTABLE(v2w${N} < vdenorm_cutoff) ? vdenorm${N} : vnorm${N}); 58*4bdc9457SAndroid Build Coastguard Worker 59*4bdc9457SAndroid Build Coastguard Worker $for N in range(BATCH_TILE): 60*4bdc9457SAndroid Build Coastguard Worker o[${N}] = vf${N}; 61*4bdc9457SAndroid Build Coastguard Worker o += ${BATCH_TILE}; 62*4bdc9457SAndroid Build Coastguard Worker } 63*4bdc9457SAndroid Build Coastguard Worker $if BATCH_TILE == 1: 64*4bdc9457SAndroid Build Coastguard Worker do { 65*4bdc9457SAndroid Build Coastguard Worker const uint16_t vh = *i++; 66*4bdc9457SAndroid Build Coastguard Worker 67*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = (uint32_t) vh << 16; 68*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsign = vw & vsign_mask; 69*4bdc9457SAndroid Build Coastguard Worker const uint32_t v2w = vw + vw; 70*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 71*4bdc9457SAndroid Build Coastguard Worker const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 72*4bdc9457SAndroid Build Coastguard Worker const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 73*4bdc9457SAndroid Build Coastguard Worker 74*4bdc9457SAndroid Build Coastguard Worker *o++ = vf; 75*4bdc9457SAndroid Build Coastguard Worker 76*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(uint16_t); 77*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 78*4bdc9457SAndroid Build Coastguard Worker $elif BATCH_TILE == 2: 79*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 80*4bdc9457SAndroid Build Coastguard Worker const uint16_t vh = *i; 81*4bdc9457SAndroid Build Coastguard Worker 82*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = (uint32_t) vh << 16; 83*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsign = vw & vsign_mask; 84*4bdc9457SAndroid Build Coastguard Worker const uint32_t v2w = vw + vw; 85*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 86*4bdc9457SAndroid Build Coastguard Worker const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 87*4bdc9457SAndroid Build Coastguard Worker const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 88*4bdc9457SAndroid Build Coastguard Worker 89*4bdc9457SAndroid Build Coastguard Worker *o = vf; 90*4bdc9457SAndroid Build Coastguard Worker } 91*4bdc9457SAndroid Build Coastguard Worker $else: 92*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 93*4bdc9457SAndroid Build Coastguard Worker do { 94*4bdc9457SAndroid Build Coastguard Worker const uint16_t vh = *i++; 95*4bdc9457SAndroid Build Coastguard Worker 96*4bdc9457SAndroid Build Coastguard Worker const uint32_t vw = (uint32_t) vh << 16; 97*4bdc9457SAndroid Build Coastguard Worker const uint32_t vsign = vw & vsign_mask; 98*4bdc9457SAndroid Build Coastguard Worker const uint32_t v2w = vw + vw; 99*4bdc9457SAndroid Build Coastguard Worker const uint32_t vnorm = float_as_uint32(uint32_as_float((v2w >> 4) + vexp_offset) * vexp_scale); 100*4bdc9457SAndroid Build Coastguard Worker const uint32_t vdenorm = float_as_uint32(uint32_as_float((v2w >> 17) | vmagic_mask) - vmagic_bias); 101*4bdc9457SAndroid Build Coastguard Worker const uint32_t vf = vsign | (XNN_UNPREDICTABLE(v2w < vdenorm_cutoff) ? vdenorm : vnorm); 102*4bdc9457SAndroid Build Coastguard Worker 103*4bdc9457SAndroid Build Coastguard Worker *o++ = vf; 104*4bdc9457SAndroid Build Coastguard Worker 105*4bdc9457SAndroid Build Coastguard Worker n -= sizeof(uint16_t); 106*4bdc9457SAndroid Build Coastguard Worker } while (n != 0); 107*4bdc9457SAndroid Build Coastguard Worker } 108*4bdc9457SAndroid Build Coastguard Worker} 109