1// Copyright 2021 Google LLC 2// 3// This source code is licensed under the BSD-style license found in the 4// LICENSE file in the root directory of this source tree. 5 6$assert BATCH_TILE >= 1 7#include <assert.h> 8#include <math.h> 9 10#include <xnnpack/common.h> 11#include <xnnpack/math.h> 12#include <xnnpack/vcvt.h> 13 14 15void xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x${BATCH_TILE}( 16 size_t n, 17 const float* input, 18 void* output, 19 const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 20{ 21 assert(n != 0); 22 assert(n % sizeof(float) == 0); 23 assert(input != NULL); 24 assert(output != NULL); 25 26 const float vscale_to_inf = params->scalar_fabsf.scale_to_inf; 27 const uint32_t vexp_bias = params->scalar_fabsf.exp_bias; 28 const float vscale_to_zero = params->scalar_fabsf.scale_to_zero; 29 const uint32_t vexpw_max = params->scalar_fabsf.expw_max; 30 const uint32_t vbias_min = params->scalar_fabsf.bias_min; 31 const uint16_t vexph_mask = params->scalar_fabsf.exph_mask; 32 const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask; 33 const uint16_t vnanh = params->scalar_fabsf.nanh; 34 35 uint16_t* o = (uint16_t*) output; 36 $if BATCH_TILE > 1: 37 for (; n >= ${BATCH_TILE} * sizeof(float); n -= ${BATCH_TILE} * sizeof(float)) { 38 $for N in range(BATCH_TILE): 39 const float vx${N} = input[${N}]; 40 input += ${BATCH_TILE}; 41 42 $for N in range(BATCH_TILE): 43 const float vabsx${N} = fabsf(vx${N}); 44 $for N in range(BATCH_TILE): 45 uint32_t vsignw${N} = float_as_uint32(vx${N}); 46 47 $for N in range(BATCH_TILE): 48 const uint32_t vnonsignw${N} = float_as_uint32(vabsx${N}); 49 $for N in range(BATCH_TILE): 50 float vf${N} = vabsx${N} * vscale_to_inf; 51 52 $for N in range(BATCH_TILE): 53 uint32_t vbias${N} = vnonsignw${N} + vexp_bias; 54 $for N in range(BATCH_TILE): 55 vsignw${N} ^= vnonsignw${N}; 56 57 $for N in range(BATCH_TILE): 58 vf${N} *= vscale_to_zero; 59 $for N in range(BATCH_TILE): 60 vbias${N} &= vexpw_max; 61 62 $for N in range(BATCH_TILE): 63 vbias${N} = math_max_u32(vbias${N}, vbias_min); 64 65 $for N in range(BATCH_TILE): 66 vf${N} += uint32_as_float(vbias${N}); 67 68 $for N in range(BATCH_TILE): 69 const uint32_t vbits${N} = float_as_uint32(vf${N}); 70 71 $for N in range(BATCH_TILE): 72 const uint16_t vexph${N} = (uint16_t) (vbits${N} >> 13) & vexph_mask; 73 $for N in range(BATCH_TILE): 74 const uint16_t vmanth${N} = (uint16_t) vbits${N} & vmanth_mask; 75 $for N in range(BATCH_TILE): 76 const uint16_t vsignh${N} = (uint16_t) (vsignw${N} >> 16); 77 78 $for N in range(BATCH_TILE): 79 uint16_t vh${N} = vexph${N} + vmanth${N}; 80 $for N in range(BATCH_TILE): 81 if XNN_UNPREDICTABLE(vnonsignw${N} > vexpw_max) { 82 vh${N} = vnanh; 83 } 84 $for N in range(BATCH_TILE): 85 vh${N} |= vsignh${N}; 86 87 $for N in range(BATCH_TILE): 88 o[${N}] = vh${N}; 89 o += ${BATCH_TILE}; 90 } 91 $if BATCH_TILE == 1: 92 do { 93 const float vx = *input++; 94 95 const float vabsx = fabsf(vx); 96 uint32_t vsignw = float_as_uint32(vx); 97 98 const uint32_t vnonsignw = float_as_uint32(vabsx); 99 float vf = vabsx * vscale_to_inf; 100 101 uint32_t vbias = vnonsignw + vexp_bias; 102 vsignw ^= vnonsignw; 103 104 vf *= vscale_to_zero; 105 vbias &= vexpw_max; 106 107 vbias = math_max_u32(vbias, vbias_min); 108 109 vf += uint32_as_float(vbias); 110 111 const uint32_t vbits = float_as_uint32(vf); 112 113 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 114 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 115 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 116 117 uint16_t vh = vexph + vmanth; 118 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 119 vh = vnanh; 120 } 121 vh |= vsignh; 122 123 *o++ = vh; 124 125 n -= sizeof(float); 126 } while (n != 0); 127 $elif BATCH_TILE == 2: 128 if XNN_UNLIKELY(n != 0) { 129 const float vx = *input; 130 131 const float vabsx = fabsf(vx); 132 uint32_t vsignw = float_as_uint32(vx); 133 134 const uint32_t vnonsignw = float_as_uint32(vabsx); 135 float vf = vabsx * vscale_to_inf; 136 137 uint32_t vbias = vnonsignw + vexp_bias; 138 vsignw ^= vnonsignw; 139 140 vf *= vscale_to_zero; 141 vbias &= vexpw_max; 142 143 vbias = math_max_u32(vbias, vbias_min); 144 145 vf += uint32_as_float(vbias); 146 147 const uint32_t vbits = float_as_uint32(vf); 148 149 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 150 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 151 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 152 153 uint16_t vh = vexph + vmanth; 154 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 155 vh = vnanh; 156 } 157 vh |= vsignh; 158 159 *o = vh; 160 } 161 $else: 162 if XNN_UNLIKELY(n != 0) { 163 do { 164 const float vx = *input++; 165 166 const float vabsx = fabsf(vx); 167 uint32_t vsignw = float_as_uint32(vx); 168 169 const uint32_t vnonsignw = float_as_uint32(vabsx); 170 float vf = vabsx * vscale_to_inf; 171 172 uint32_t vbias = vnonsignw + vexp_bias; 173 vsignw ^= vnonsignw; 174 175 vf *= vscale_to_zero; 176 vbias &= vexpw_max; 177 178 vbias = math_max_u32(vbias, vbias_min); 179 180 vf += uint32_as_float(vbias); 181 182 const uint32_t vbits = float_as_uint32(vf); 183 184 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 185 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 186 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 187 188 uint16_t vh = vexph + vmanth; 189 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 190 vh = vnanh; 191 } 192 vh |= vsignh; 193 194 *o++ = vh; 195 196 n -= sizeof(float); 197 } while (n != 0); 198 } 199} 200