1 // Auto-generated file. Do not edit!
2 // Template: src/f32-f16-vcvt/scalar-fabsf.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2021 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11 #include <math.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/math.h>
15 #include <xnnpack/vcvt.h>
16
17
xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x3(size_t n,const float * input,void * output,const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS (1)])18 void xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x3(
19 size_t n,
20 const float* input,
21 void* output,
22 const union xnn_f32_f16_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
23 {
24 assert(n != 0);
25 assert(n % sizeof(float) == 0);
26 assert(input != NULL);
27 assert(output != NULL);
28
29 const float vscale_to_inf = params->scalar_fabsf.scale_to_inf;
30 const uint32_t vexp_bias = params->scalar_fabsf.exp_bias;
31 const float vscale_to_zero = params->scalar_fabsf.scale_to_zero;
32 const uint32_t vexpw_max = params->scalar_fabsf.expw_max;
33 const uint32_t vbias_min = params->scalar_fabsf.bias_min;
34 const uint16_t vexph_mask = params->scalar_fabsf.exph_mask;
35 const uint16_t vmanth_mask = params->scalar_fabsf.manth_mask;
36 const uint16_t vnanh = params->scalar_fabsf.nanh;
37
38 uint16_t* o = (uint16_t*) output;
39 for (; n >= 3 * sizeof(float); n -= 3 * sizeof(float)) {
40 const float vx0 = input[0];
41 const float vx1 = input[1];
42 const float vx2 = input[2];
43 input += 3;
44
45 const float vabsx0 = fabsf(vx0);
46 const float vabsx1 = fabsf(vx1);
47 const float vabsx2 = fabsf(vx2);
48 uint32_t vsignw0 = float_as_uint32(vx0);
49 uint32_t vsignw1 = float_as_uint32(vx1);
50 uint32_t vsignw2 = float_as_uint32(vx2);
51
52 const uint32_t vnonsignw0 = float_as_uint32(vabsx0);
53 const uint32_t vnonsignw1 = float_as_uint32(vabsx1);
54 const uint32_t vnonsignw2 = float_as_uint32(vabsx2);
55 float vf0 = vabsx0 * vscale_to_inf;
56 float vf1 = vabsx1 * vscale_to_inf;
57 float vf2 = vabsx2 * vscale_to_inf;
58
59 uint32_t vbias0 = vnonsignw0 + vexp_bias;
60 uint32_t vbias1 = vnonsignw1 + vexp_bias;
61 uint32_t vbias2 = vnonsignw2 + vexp_bias;
62 vsignw0 ^= vnonsignw0;
63 vsignw1 ^= vnonsignw1;
64 vsignw2 ^= vnonsignw2;
65
66 vf0 *= vscale_to_zero;
67 vf1 *= vscale_to_zero;
68 vf2 *= vscale_to_zero;
69 vbias0 &= vexpw_max;
70 vbias1 &= vexpw_max;
71 vbias2 &= vexpw_max;
72
73 vbias0 = math_max_u32(vbias0, vbias_min);
74 vbias1 = math_max_u32(vbias1, vbias_min);
75 vbias2 = math_max_u32(vbias2, vbias_min);
76
77 vf0 += uint32_as_float(vbias0);
78 vf1 += uint32_as_float(vbias1);
79 vf2 += uint32_as_float(vbias2);
80
81 const uint32_t vbits0 = float_as_uint32(vf0);
82 const uint32_t vbits1 = float_as_uint32(vf1);
83 const uint32_t vbits2 = float_as_uint32(vf2);
84
85 const uint16_t vexph0 = (uint16_t) (vbits0 >> 13) & vexph_mask;
86 const uint16_t vexph1 = (uint16_t) (vbits1 >> 13) & vexph_mask;
87 const uint16_t vexph2 = (uint16_t) (vbits2 >> 13) & vexph_mask;
88 const uint16_t vmanth0 = (uint16_t) vbits0 & vmanth_mask;
89 const uint16_t vmanth1 = (uint16_t) vbits1 & vmanth_mask;
90 const uint16_t vmanth2 = (uint16_t) vbits2 & vmanth_mask;
91 const uint16_t vsignh0 = (uint16_t) (vsignw0 >> 16);
92 const uint16_t vsignh1 = (uint16_t) (vsignw1 >> 16);
93 const uint16_t vsignh2 = (uint16_t) (vsignw2 >> 16);
94
95 uint16_t vh0 = vexph0 + vmanth0;
96 uint16_t vh1 = vexph1 + vmanth1;
97 uint16_t vh2 = vexph2 + vmanth2;
98 if XNN_UNPREDICTABLE(vnonsignw0 > vexpw_max) {
99 vh0 = vnanh;
100 }
101 if XNN_UNPREDICTABLE(vnonsignw1 > vexpw_max) {
102 vh1 = vnanh;
103 }
104 if XNN_UNPREDICTABLE(vnonsignw2 > vexpw_max) {
105 vh2 = vnanh;
106 }
107 vh0 |= vsignh0;
108 vh1 |= vsignh1;
109 vh2 |= vsignh2;
110
111 o[0] = vh0;
112 o[1] = vh1;
113 o[2] = vh2;
114 o += 3;
115 }
116 if XNN_UNLIKELY(n != 0) {
117 do {
118 const float vx = *input++;
119
120 const float vabsx = fabsf(vx);
121 uint32_t vsignw = float_as_uint32(vx);
122
123 const uint32_t vnonsignw = float_as_uint32(vabsx);
124 float vf = vabsx * vscale_to_inf;
125
126 uint32_t vbias = vnonsignw + vexp_bias;
127 vsignw ^= vnonsignw;
128
129 vf *= vscale_to_zero;
130 vbias &= vexpw_max;
131
132 vbias = math_max_u32(vbias, vbias_min);
133
134 vf += uint32_as_float(vbias);
135
136 const uint32_t vbits = float_as_uint32(vf);
137
138 const uint16_t vexph = (uint16_t) (vbits >> 13) & vexph_mask;
139 const uint16_t vmanth = (uint16_t) vbits & vmanth_mask;
140 const uint16_t vsignh = (uint16_t) (vsignw >> 16);
141
142 uint16_t vh = vexph + vmanth;
143 if XNN_UNPREDICTABLE(vnonsignw > vexpw_max) {
144 vh = vnanh;
145 }
146 vh |= vsignh;
147
148 *o++ = vh;
149
150 n -= sizeof(float);
151 } while (n != 0);
152 }
153 }
154