1 // Auto-generated file. Do not edit! 2 // Template: src/f32-f16-vcvt/scalar-fabsf.c.in 3 // Generator: tools/xngen 4 // 5 // Copyright 2021 Google LLC 6 // 7 // This source code is licensed under the BSD-style license found in the 8 // LICENSE file in the root directory of this source tree. 9 10 #include <assert.h> 11 #include <math.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/math.h> 15 #include <xnnpack/vcvt.h> 16 17 xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2(size_t n,const float * input,void * output,const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS (1)])18void xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2( 19 size_t n, 20 const float* input, 21 void* output, 22 const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 23 { 24 assert(n != 0); 25 assert(n % sizeof(float) == 0); 26 assert(input != NULL); 27 assert(output != NULL); 28 29 const float vscale_to_inf = params->scalar_fabsf.scale_to_inf; 30 const uint32_t vexp_bias = params->scalar_fabsf.exp_bias; 31 const float vscale_to_zero = params->scalar_fabsf.scale_to_zero; 32 const uint32_t vexpw_max = params->scalar_fabsf.expw_max; 33 const uint32_t vbias_min = params->scalar_fabsf.bias_min; 34 const uint16_t vexph_mask = params->scalar_fabsf.exph_mask; 35 const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask; 36 const uint16_t vnanh = params->scalar_fabsf.nanh; 37 38 uint16_t* o = (uint16_t*) output; 39 for (; n >= 2 * sizeof(float); n -= 2 * sizeof(float)) { 40 const float vx0 = input[0]; 41 const float vx1 = input[1]; 42 input += 2; 43 44 const float vabsx0 = fabsf(vx0); 45 const float vabsx1 = fabsf(vx1); 46 uint32_t vsignw0 = float_as_uint32(vx0); 47 uint32_t vsignw1 = float_as_uint32(vx1); 48 49 const uint32_t vnonsignw0 = float_as_uint32(vabsx0); 50 const uint32_t vnonsignw1 = float_as_uint32(vabsx1); 51 float vf0 = vabsx0 * vscale_to_inf; 52 float vf1 = vabsx1 * vscale_to_inf; 53 54 uint32_t vbias0 = vnonsignw0 + vexp_bias; 55 uint32_t vbias1 = vnonsignw1 + vexp_bias; 56 vsignw0 ^= vnonsignw0; 57 vsignw1 ^= vnonsignw1; 58 59 vf0 *= vscale_to_zero; 60 vf1 *= vscale_to_zero; 61 vbias0 &= vexpw_max; 62 vbias1 &= vexpw_max; 63 64 vbias0 = math_max_u32(vbias0, vbias_min); 65 vbias1 = math_max_u32(vbias1, vbias_min); 66 67 vf0 += uint32_as_float(vbias0); 68 vf1 += uint32_as_float(vbias1); 69 70 const uint32_t vbits0 = float_as_uint32(vf0); 71 const uint32_t vbits1 = float_as_uint32(vf1); 72 73 const uint16_t vexph0 = (uint16_t) (vbits0 >> 13) & vexph_mask; 74 const uint16_t vexph1 = (uint16_t) (vbits1 >> 13) & vexph_mask; 75 const uint16_t vmanth0 = (uint16_t) vbits0 & vmanth_mask; 76 const uint16_t vmanth1 = (uint16_t) vbits1 & vmanth_mask; 77 const uint16_t vsignh0 = (uint16_t) (vsignw0 >> 16); 78 const uint16_t vsignh1 = (uint16_t) (vsignw1 >> 16); 79 80 uint16_t vh0 = vexph0 + vmanth0; 81 uint16_t vh1 = vexph1 + vmanth1; 82 if XNN_UNPREDICTABLE(vnonsignw0 > vexpw_max) { 83 vh0 = vnanh; 84 } 85 if XNN_UNPREDICTABLE(vnonsignw1 > vexpw_max) { 86 vh1 = vnanh; 87 } 88 vh0 |= vsignh0; 89 vh1 |= vsignh1; 90 91 o[0] = vh0; 92 o[1] = vh1; 93 o += 2; 94 } 95 if XNN_UNLIKELY(n != 0) { 96 const float vx = *input; 97 98 const float vabsx = fabsf(vx); 99 uint32_t vsignw = float_as_uint32(vx); 100 101 const uint32_t vnonsignw = float_as_uint32(vabsx); 102 float vf = vabsx * vscale_to_inf; 103 104 uint32_t vbias = vnonsignw + vexp_bias; 105 vsignw ^= vnonsignw; 106 107 vf *= vscale_to_zero; 108 vbias &= vexpw_max; 109 110 vbias = math_max_u32(vbias, vbias_min); 111 112 vf += uint32_as_float(vbias); 113 114 const uint32_t vbits = float_as_uint32(vf); 115 116 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 117 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 118 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 119 120 uint16_t vh = vexph + vmanth; 121 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 122 vh = vnanh; 123 } 124 vh |= vsignh; 125 126 *o = vh; 127 } 128 } 129