1 // Auto-generated file. Do not edit! 2 // Template: src/f32-f16-vcvt/scalar-bitcast.c.in 3 // Generator: tools/xngen 4 // 5 // Copyright 2021 Google LLC 6 // 7 // This source code is licensed under the BSD-style license found in the 8 // LICENSE file in the root directory of this source tree. 9 10 #include <assert.h> 11 12 #include <xnnpack/common.h> 13 #include <xnnpack/math.h> 14 #include <xnnpack/vcvt.h> 15 16 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x2(size_t n,const float * input,void * output,const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS (1)])17void xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x2( 18 size_t n, 19 const float* input, 20 void* output, 21 const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)]) 22 { 23 assert(n != 0); 24 assert(n % sizeof(float) == 0); 25 assert(input != NULL); 26 assert(output != NULL); 27 28 const uint32_t vnonsign_mask = params->scalar_bitcast.nonsign_mask; 29 const uint32_t vexp_bias = params->scalar_bitcast.exp_bias; 30 const float vscale_to_inf = params->scalar_bitcast.scale_to_inf; 31 const uint32_t vexpw_max = params->scalar_bitcast.expw_max; 32 const float vscale_to_zero = params->scalar_bitcast.scale_to_zero; 33 const uint32_t vbias_min = params->scalar_bitcast.bias_min; 34 const uint16_t vexph_mask = params->scalar_bitcast.exph_mask; 35 const uint16_t vmanth_mask = params->scalar_bitcast.manth_mask; 36 const uint16_t vnanh = params->scalar_bitcast.nanh; 37 38 const uint32_t* i = (const uint32_t*) input; 39 uint16_t* o = (uint16_t*) output; 40 for (; n >= 2 * sizeof(float); n -= 2 * sizeof(float)) { 41 const uint32_t vw0 = i[0]; 42 const uint32_t vw1 = i[1]; 43 i += 2; 44 45 const uint32_t vnonsignw0 = vw0 & vnonsign_mask; 46 const uint32_t vnonsignw1 = vw1 & vnonsign_mask; 47 48 float vf0 = uint32_as_float(vnonsignw0); 49 float vf1 = uint32_as_float(vnonsignw1); 50 const uint32_t vsignw0 = vw0 ^ vnonsignw0; 51 const uint32_t vsignw1 = vw1 ^ vnonsignw1; 52 uint32_t vbias0 = vnonsignw0 + vexp_bias; 53 uint32_t vbias1 = vnonsignw1 + vexp_bias; 54 55 vf0 *= vscale_to_inf; 56 vf1 *= vscale_to_inf; 57 vbias0 &= vexpw_max; 58 vbias1 &= vexpw_max; 59 60 vf0 *= vscale_to_zero; 61 vf1 *= vscale_to_zero; 62 vbias0 = math_max_u32(vbias0, vbias_min); 63 vbias1 = math_max_u32(vbias1, vbias_min); 64 65 vf0 += uint32_as_float(vbias0); 66 vf1 += uint32_as_float(vbias1); 67 68 const uint32_t vbits0 = float_as_uint32(vf0); 69 const uint32_t vbits1 = float_as_uint32(vf1); 70 71 const uint16_t vexph0 = (uint16_t) (vbits0 >> 13) & vexph_mask; 72 const uint16_t vexph1 = (uint16_t) (vbits1 >> 13) & vexph_mask; 73 const uint16_t vmanth0 = (uint16_t) vbits0 & vmanth_mask; 74 const uint16_t vmanth1 = (uint16_t) vbits1 & vmanth_mask; 75 const uint16_t vsignh0 = (uint16_t) (vsignw0 >> 16); 76 const uint16_t vsignh1 = (uint16_t) (vsignw1 >> 16); 77 78 uint16_t vh0 = vexph0 + vmanth0; 79 uint16_t vh1 = vexph1 + vmanth1; 80 if XNN_UNPREDICTABLE(vnonsignw0 > vexpw_max) { 81 vh0 = vnanh; 82 } 83 if XNN_UNPREDICTABLE(vnonsignw1 > vexpw_max) { 84 vh1 = vnanh; 85 } 86 vh0 |= vsignh0; 87 vh1 |= vsignh1; 88 89 o[0] = vh0; 90 o[1] = vh1; 91 o += 2; 92 } 93 if XNN_UNLIKELY(n != 0) { 94 const uint32_t vw = *i; 95 96 const uint32_t vnonsignw = vw & vnonsign_mask; 97 98 float vf = uint32_as_float(vnonsignw); 99 const uint32_t vsignw = vw ^ vnonsignw; 100 uint32_t vbias = vnonsignw + vexp_bias; 101 102 vf *= vscale_to_inf; 103 vbias &= vexpw_max; 104 105 vf *= vscale_to_zero; 106 vbias = math_max_u32(vbias, vbias_min); 107 108 vf += uint32_as_float(vbias); 109 110 const uint32_t vbits = float_as_uint32(vf); 111 112 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask; 113 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask; 114 const uint16_t vsignh = (uint16_t) (vsignw >> 16); 115 116 uint16_t vh = vexph + vmanth; 117 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) { 118 vh = vnanh; 119 } 120 vh |= vsignh; 121 122 *o = vh; 123 } 124 } 125