1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2021 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker
6*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
9*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
10*4bdc9457SAndroid Build Coastguard Worker
11*4bdc9457SAndroid Build Coastguard Worker #include <fp16.h>
12*4bdc9457SAndroid Build Coastguard Worker
13*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
14*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
15*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/unaligned.h>
16*4bdc9457SAndroid Build Coastguard Worker
17*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)18*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params(
19*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
20*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
21*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
22*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
23*4bdc9457SAndroid Build Coastguard Worker {
24*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
25*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
26*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
27*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
28*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_fmagic);
29*4bdc9457SAndroid Build Coastguard Worker }
30*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)31*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params(
32*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
33*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
34*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
35*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
36*4bdc9457SAndroid Build Coastguard Worker {
37*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
38*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
39*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias = 12582912.0f;
40*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
41*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
42*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
43*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_imagic);
44*4bdc9457SAndroid Build Coastguard Worker }
45*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)46*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params(
47*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
48*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
49*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
50*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
51*4bdc9457SAndroid Build Coastguard Worker {
52*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
53*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
54*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
55*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_lrintf);
56*4bdc9457SAndroid Build Coastguard Worker }
57*4bdc9457SAndroid Build Coastguard Worker
58*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qc8_conv_minmax_fp32_sse2_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)59*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_sse2_params(
60*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
61*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
62*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
63*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
64*4bdc9457SAndroid Build Coastguard Worker {
65*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
66*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
67*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
68*4bdc9457SAndroid Build Coastguard Worker }
69*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
70*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
71*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = (int16_t) output_min;
72*4bdc9457SAndroid Build Coastguard Worker }
73*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
74*4bdc9457SAndroid Build Coastguard Worker }
75*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_sse4_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)76*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_sse4_params(
77*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
78*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
79*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
80*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
81*4bdc9457SAndroid Build Coastguard Worker {
82*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
83*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
84*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
85*4bdc9457SAndroid Build Coastguard Worker }
86*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
87*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
88*4bdc9457SAndroid Build Coastguard Worker }
89*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
90*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_min[i] = output_min;
91*4bdc9457SAndroid Build Coastguard Worker }
92*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse4);
93*4bdc9457SAndroid Build Coastguard Worker }
94*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_avx2_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)95*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_avx2_params(
96*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
97*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
98*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
99*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
100*4bdc9457SAndroid Build Coastguard Worker {
101*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
102*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
103*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
104*4bdc9457SAndroid Build Coastguard Worker }
105*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
106*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
107*4bdc9457SAndroid Build Coastguard Worker }
108*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
109*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_min[i] = output_min;
110*4bdc9457SAndroid Build Coastguard Worker }
111*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx2);
112*4bdc9457SAndroid Build Coastguard Worker }
113*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_avx512_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)114*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_avx512_params(
115*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
116*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
117*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
118*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
119*4bdc9457SAndroid Build Coastguard Worker {
120*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
121*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
122*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
123*4bdc9457SAndroid Build Coastguard Worker }
124*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
125*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_zero_point[i] = (int16_t) output_zero_point;
126*4bdc9457SAndroid Build Coastguard Worker }
127*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 64; i++) {
128*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_min[i] = output_min;
129*4bdc9457SAndroid Build Coastguard Worker }
130*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx512);
131*4bdc9457SAndroid Build Coastguard Worker }
132*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
133*4bdc9457SAndroid Build Coastguard Worker
134*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qc8_conv_minmax_fp32_armsimd32_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)135*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_armsimd32_params(
136*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
137*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
138*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
139*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
140*4bdc9457SAndroid Build Coastguard Worker {
141*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias = 12582912.0f;
142*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
143*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_min = (uint32_t) (uint8_t) output_min * UINT32_C(0x01010101);
144*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_max = (uint32_t) (uint8_t) output_max * UINT32_C(0x01010101);
145*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_armsimd32);
146*4bdc9457SAndroid Build Coastguard Worker }
147*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
148*4bdc9457SAndroid Build Coastguard Worker
149*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qc8_conv_minmax_fp32_neon_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)150*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_neon_params(
151*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
152*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
153*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
154*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
155*4bdc9457SAndroid Build Coastguard Worker {
156*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
157*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
158*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
159*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
160*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
161*4bdc9457SAndroid Build Coastguard Worker }
162*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_conv_minmax_fp32_neonv8_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)163*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_neonv8_params(
164*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
165*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
166*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
167*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
168*4bdc9457SAndroid Build Coastguard Worker {
169*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
170*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
171*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
172*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
173*4bdc9457SAndroid Build Coastguard Worker }
174*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
175*4bdc9457SAndroid Build Coastguard Worker
176*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qc8_conv_minmax_fp32_wasmsimd_params(union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_zero_point,int8_t output_min,int8_t output_max)177*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qc8_conv_minmax_fp32_wasmsimd_params(
178*4bdc9457SAndroid Build Coastguard Worker union xnn_qc8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
179*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
180*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
181*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
182*4bdc9457SAndroid Build Coastguard Worker {
183*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
184*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
185*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
186*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
187*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
188*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
189*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
190*4bdc9457SAndroid Build Coastguard Worker }
191*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
192*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
193*4bdc9457SAndroid Build Coastguard Worker }
194*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
195*4bdc9457SAndroid Build Coastguard Worker }
196*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
197*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)198*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params(
199*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
200*4bdc9457SAndroid Build Coastguard Worker float scale,
201*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
202*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
203*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
204*4bdc9457SAndroid Build Coastguard Worker {
205*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
206*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
207*4bdc9457SAndroid Build Coastguard Worker
208*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
209*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
210*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
211*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
212*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
213*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_fmagic);
214*4bdc9457SAndroid Build Coastguard Worker }
215*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)216*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params(
217*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
218*4bdc9457SAndroid Build Coastguard Worker float scale,
219*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
220*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
221*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
222*4bdc9457SAndroid Build Coastguard Worker {
223*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
224*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
225*4bdc9457SAndroid Build Coastguard Worker
226*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
227*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
228*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
229*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias = 12582912.0f;
230*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
231*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
232*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
233*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_imagic);
234*4bdc9457SAndroid Build Coastguard Worker }
235*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)236*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params(
237*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
238*4bdc9457SAndroid Build Coastguard Worker float scale,
239*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
240*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
241*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
242*4bdc9457SAndroid Build Coastguard Worker {
243*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
244*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
245*4bdc9457SAndroid Build Coastguard Worker
246*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
247*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
248*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
249*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
250*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_lrintf);
251*4bdc9457SAndroid Build Coastguard Worker }
252*4bdc9457SAndroid Build Coastguard Worker
253*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_conv_minmax_fp32_sse2_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)254*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_sse2_params(
255*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
256*4bdc9457SAndroid Build Coastguard Worker float scale,
257*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
258*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
259*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
260*4bdc9457SAndroid Build Coastguard Worker {
261*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
262*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
263*4bdc9457SAndroid Build Coastguard Worker
264*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
265*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
266*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
267*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
268*4bdc9457SAndroid Build Coastguard Worker }
269*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
270*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
271*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = (int16_t) output_min;
272*4bdc9457SAndroid Build Coastguard Worker }
273*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
274*4bdc9457SAndroid Build Coastguard Worker }
275*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_sse4_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)276*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_sse4_params(
277*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
278*4bdc9457SAndroid Build Coastguard Worker float scale,
279*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
280*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
281*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
282*4bdc9457SAndroid Build Coastguard Worker {
283*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
284*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
285*4bdc9457SAndroid Build Coastguard Worker
286*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
287*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
288*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = scale;
289*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
290*4bdc9457SAndroid Build Coastguard Worker }
291*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
292*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
293*4bdc9457SAndroid Build Coastguard Worker }
294*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
295*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_min[i] = output_min;
296*4bdc9457SAndroid Build Coastguard Worker }
297*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse4);
298*4bdc9457SAndroid Build Coastguard Worker }
299*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_avx2_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)300*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_avx2_params(
301*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
302*4bdc9457SAndroid Build Coastguard Worker float scale,
303*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
304*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
305*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
306*4bdc9457SAndroid Build Coastguard Worker {
307*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
308*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
309*4bdc9457SAndroid Build Coastguard Worker
310*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
311*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
312*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.scale[i] = scale;
313*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
314*4bdc9457SAndroid Build Coastguard Worker }
315*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
316*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
317*4bdc9457SAndroid Build Coastguard Worker }
318*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
319*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_min[i] = output_min;
320*4bdc9457SAndroid Build Coastguard Worker }
321*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx2);
322*4bdc9457SAndroid Build Coastguard Worker }
323*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_avx512_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)324*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_avx512_params(
325*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
326*4bdc9457SAndroid Build Coastguard Worker float scale,
327*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
328*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
329*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
330*4bdc9457SAndroid Build Coastguard Worker {
331*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
332*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
333*4bdc9457SAndroid Build Coastguard Worker
334*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
335*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
336*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.scale[i] = scale;
337*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
338*4bdc9457SAndroid Build Coastguard Worker }
339*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
340*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_zero_point[i] = (int16_t) output_zero_point;
341*4bdc9457SAndroid Build Coastguard Worker }
342*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 64; i++) {
343*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_min[i] = output_min;
344*4bdc9457SAndroid Build Coastguard Worker }
345*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx512);
346*4bdc9457SAndroid Build Coastguard Worker }
347*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
348*4bdc9457SAndroid Build Coastguard Worker
349*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qs8_conv_minmax_fp32_armsimd32_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)350*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_armsimd32_params(
351*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
352*4bdc9457SAndroid Build Coastguard Worker float scale,
353*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
354*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
355*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
356*4bdc9457SAndroid Build Coastguard Worker {
357*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
358*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
359*4bdc9457SAndroid Build Coastguard Worker
360*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.scale = scale;
361*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias = 12582912.0f;
362*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
363*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_min = (uint32_t) (uint8_t) output_min * UINT32_C(0x01010101);
364*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_max = (uint32_t) (uint8_t) output_max * UINT32_C(0x01010101);
365*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_armsimd32);
366*4bdc9457SAndroid Build Coastguard Worker }
367*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
368*4bdc9457SAndroid Build Coastguard Worker
369*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_conv_minmax_fp32_neon_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)370*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_neon_params(
371*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
372*4bdc9457SAndroid Build Coastguard Worker float scale,
373*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
374*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
375*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
376*4bdc9457SAndroid Build Coastguard Worker {
377*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
378*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
379*4bdc9457SAndroid Build Coastguard Worker
380*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
381*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
382*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
383*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
384*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
385*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
386*4bdc9457SAndroid Build Coastguard Worker }
387*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_fp32_neonv8_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)388*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_neonv8_params(
389*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
390*4bdc9457SAndroid Build Coastguard Worker float scale,
391*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
392*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
393*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
394*4bdc9457SAndroid Build Coastguard Worker {
395*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
396*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
397*4bdc9457SAndroid Build Coastguard Worker
398*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
399*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
400*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
401*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
402*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
403*4bdc9457SAndroid Build Coastguard Worker }
404*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_conv_minmax_rndnu_neon_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)405*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_rndnu_neon_params(
406*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
407*4bdc9457SAndroid Build Coastguard Worker float scale,
408*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
409*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
410*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
411*4bdc9457SAndroid Build Coastguard Worker {
412*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
413*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
414*4bdc9457SAndroid Build Coastguard Worker
415*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
416*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
417*4bdc9457SAndroid Build Coastguard Worker
418*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
419*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
420*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
421*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
422*4bdc9457SAndroid Build Coastguard Worker
423*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
424*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
425*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
426*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
427*4bdc9457SAndroid Build Coastguard Worker
428*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
429*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
430*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
431*4bdc9457SAndroid Build Coastguard Worker
432*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.right_pre_shift = -pre_shift;
433*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
434*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.right_post_shift = -post_shift;
435*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
436*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
437*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
438*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
439*4bdc9457SAndroid Build Coastguard Worker }
440*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
441*4bdc9457SAndroid Build Coastguard Worker
442*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_conv_minmax_fp32_wasmsimd_params(union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)443*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_conv_minmax_fp32_wasmsimd_params(
444*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
445*4bdc9457SAndroid Build Coastguard Worker float scale,
446*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
447*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
448*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
449*4bdc9457SAndroid Build Coastguard Worker {
450*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
451*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
452*4bdc9457SAndroid Build Coastguard Worker
453*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
454*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
455*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
456*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
457*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
458*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
459*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
460*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
461*4bdc9457SAndroid Build Coastguard Worker }
462*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
463*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
464*4bdc9457SAndroid Build Coastguard Worker }
465*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
466*4bdc9457SAndroid Build Coastguard Worker }
467*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
468*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)469*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params(
470*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
471*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
472*4bdc9457SAndroid Build Coastguard Worker float scale,
473*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
474*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
475*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
476*4bdc9457SAndroid Build Coastguard Worker {
477*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
478*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
479*4bdc9457SAndroid Build Coastguard Worker
480*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.kernel_zero_point = (int32_t) kernel_zero_point;
481*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
482*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
483*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
484*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
485*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
486*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_fmagic);
487*4bdc9457SAndroid Build Coastguard Worker }
488*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)489*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params(
490*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
491*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
492*4bdc9457SAndroid Build Coastguard Worker float scale,
493*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
494*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
495*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
496*4bdc9457SAndroid Build Coastguard Worker {
497*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
498*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
499*4bdc9457SAndroid Build Coastguard Worker
500*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
501*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
502*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.kernel_zero_point = (int32_t) kernel_zero_point;
503*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
504*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias = 12582912.0f;
505*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
506*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
507*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
508*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_imagic);
509*4bdc9457SAndroid Build Coastguard Worker }
510*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)511*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params(
512*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
513*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
514*4bdc9457SAndroid Build Coastguard Worker float scale,
515*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
516*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
517*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
518*4bdc9457SAndroid Build Coastguard Worker {
519*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
520*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
521*4bdc9457SAndroid Build Coastguard Worker
522*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.kernel_zero_point = (int32_t) kernel_zero_point;
523*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
524*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
525*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
526*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
527*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_lrintf);
528*4bdc9457SAndroid Build Coastguard Worker }
529*4bdc9457SAndroid Build Coastguard Worker
530*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_conv_minmax_fp32_sse2_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)531*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_sse2_params(
532*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
533*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
534*4bdc9457SAndroid Build Coastguard Worker float scale,
535*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
536*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
537*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
538*4bdc9457SAndroid Build Coastguard Worker {
539*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
540*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
541*4bdc9457SAndroid Build Coastguard Worker
542*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
543*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
544*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
545*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
546*4bdc9457SAndroid Build Coastguard Worker }
547*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
548*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.kernel_zero_point[i] = (int16_t) kernel_zero_point;
549*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
550*4bdc9457SAndroid Build Coastguard Worker }
551*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
552*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = output_min;
553*4bdc9457SAndroid Build Coastguard Worker }
554*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
555*4bdc9457SAndroid Build Coastguard Worker }
556*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_avx2_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)557*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_avx2_params(
558*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
559*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
560*4bdc9457SAndroid Build Coastguard Worker float scale,
561*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
562*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
563*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
564*4bdc9457SAndroid Build Coastguard Worker {
565*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
566*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
567*4bdc9457SAndroid Build Coastguard Worker
568*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
569*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
570*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.scale[i] = scale;
571*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
572*4bdc9457SAndroid Build Coastguard Worker }
573*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
574*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.kernel_zero_point[i] = (int16_t) kernel_zero_point;
575*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
576*4bdc9457SAndroid Build Coastguard Worker }
577*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
578*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx2.output_min[i] = output_min;
579*4bdc9457SAndroid Build Coastguard Worker }
580*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx2);
581*4bdc9457SAndroid Build Coastguard Worker }
582*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_avx512_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)583*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_avx512_params(
584*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
585*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
586*4bdc9457SAndroid Build Coastguard Worker float scale,
587*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
588*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
589*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
590*4bdc9457SAndroid Build Coastguard Worker {
591*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
592*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
593*4bdc9457SAndroid Build Coastguard Worker
594*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
595*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
596*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.scale[i] = scale;
597*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
598*4bdc9457SAndroid Build Coastguard Worker }
599*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
600*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point;
601*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
602*4bdc9457SAndroid Build Coastguard Worker }
603*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 64; i++) {
604*4bdc9457SAndroid Build Coastguard Worker params->fp32_avx512.output_min[i] = output_min;
605*4bdc9457SAndroid Build Coastguard Worker }
606*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_avx512);
607*4bdc9457SAndroid Build Coastguard Worker }
608*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
609*4bdc9457SAndroid Build Coastguard Worker
610*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qu8_conv_minmax_fp32_armsimd32_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)611*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_armsimd32_params(
612*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
613*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
614*4bdc9457SAndroid Build Coastguard Worker float scale,
615*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
616*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
617*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
618*4bdc9457SAndroid Build Coastguard Worker {
619*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
620*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
621*4bdc9457SAndroid Build Coastguard Worker
622*4bdc9457SAndroid Build Coastguard Worker const int32_t minus_kernel_zero_point = -(int32_t) kernel_zero_point;
623*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.scale = scale;
624*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias = 12582912.0f;
625*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.minus_kernel_zero_point = (uint32_t) (uint16_t) minus_kernel_zero_point * UINT32_C(0x00010001);
626*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
627*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_min = (uint32_t) output_min * UINT32_C(0x01010101);
628*4bdc9457SAndroid Build Coastguard Worker params->fp32_armsimd32.output_max = (uint32_t) output_max * UINT32_C(0x01010101);
629*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_armsimd32);
630*4bdc9457SAndroid Build Coastguard Worker }
631*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
632*4bdc9457SAndroid Build Coastguard Worker
633*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_conv_minmax_fp32_neon_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)634*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_neon_params(
635*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
636*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
637*4bdc9457SAndroid Build Coastguard Worker float scale,
638*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
639*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
640*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
641*4bdc9457SAndroid Build Coastguard Worker {
642*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
643*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
644*4bdc9457SAndroid Build Coastguard Worker
645*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.kernel_zero_point[0] = kernel_zero_point;
646*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.kernel_zero_point[1] = kernel_zero_point;
647*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.kernel_zero_point[2] = kernel_zero_point;
648*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.kernel_zero_point[3] = kernel_zero_point;
649*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
650*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
651*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
652*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
653*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
654*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
655*4bdc9457SAndroid Build Coastguard Worker }
656*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_fp32_neonv8_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)657*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_neonv8_params(
658*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
659*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
660*4bdc9457SAndroid Build Coastguard Worker float scale,
661*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
662*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
663*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
664*4bdc9457SAndroid Build Coastguard Worker {
665*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
666*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
667*4bdc9457SAndroid Build Coastguard Worker
668*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.kernel_zero_point[0] = kernel_zero_point;
669*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.kernel_zero_point[1] = kernel_zero_point;
670*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.kernel_zero_point[2] = kernel_zero_point;
671*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.kernel_zero_point[3] = kernel_zero_point;
672*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
673*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) (uint16_t) output_zero_point;
674*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
675*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
676*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
677*4bdc9457SAndroid Build Coastguard Worker }
678*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_conv_minmax_rndnu_neon_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)679*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_rndnu_neon_params(
680*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
681*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
682*4bdc9457SAndroid Build Coastguard Worker float scale,
683*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
684*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
685*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
686*4bdc9457SAndroid Build Coastguard Worker {
687*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
688*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
689*4bdc9457SAndroid Build Coastguard Worker
690*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
691*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
692*4bdc9457SAndroid Build Coastguard Worker
693*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
694*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
695*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
696*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
697*4bdc9457SAndroid Build Coastguard Worker
698*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
699*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
700*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
701*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
702*4bdc9457SAndroid Build Coastguard Worker
703*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
704*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
705*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
706*4bdc9457SAndroid Build Coastguard Worker
707*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.kernel_zero_point[0] = kernel_zero_point;
708*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.kernel_zero_point[1] = kernel_zero_point;
709*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.kernel_zero_point[2] = kernel_zero_point;
710*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.kernel_zero_point[3] = kernel_zero_point;
711*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.right_pre_shift = -pre_shift;
712*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
713*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.right_post_shift = -post_shift;
714*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
715*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
716*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
717*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
718*4bdc9457SAndroid Build Coastguard Worker }
719*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
720*4bdc9457SAndroid Build Coastguard Worker
721*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_conv_minmax_fp32_wasmsimd_params(union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t kernel_zero_point,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)722*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_conv_minmax_fp32_wasmsimd_params(
723*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
724*4bdc9457SAndroid Build Coastguard Worker uint8_t kernel_zero_point,
725*4bdc9457SAndroid Build Coastguard Worker float scale,
726*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
727*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
728*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
729*4bdc9457SAndroid Build Coastguard Worker {
730*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
731*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
732*4bdc9457SAndroid Build Coastguard Worker
733*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
734*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
735*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
736*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
737*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point;
738*4bdc9457SAndroid Build Coastguard Worker }
739*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
740*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
741*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
742*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
743*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
744*4bdc9457SAndroid Build Coastguard Worker }
745*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
746*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
747*4bdc9457SAndroid Build Coastguard Worker }
748*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
749*4bdc9457SAndroid Build Coastguard Worker }
750*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
751*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qc8_scale_fp32_params(size_t channels,size_t channels_tile,size_t stride,const float scale[XNN_MIN_ELEMENTS (1)],void * packed_w)752*4bdc9457SAndroid Build Coastguard Worker void xnn_init_qc8_scale_fp32_params(
753*4bdc9457SAndroid Build Coastguard Worker size_t channels,
754*4bdc9457SAndroid Build Coastguard Worker size_t channels_tile,
755*4bdc9457SAndroid Build Coastguard Worker size_t stride,
756*4bdc9457SAndroid Build Coastguard Worker const float scale[XNN_MIN_ELEMENTS(1)],
757*4bdc9457SAndroid Build Coastguard Worker void* packed_w)
758*4bdc9457SAndroid Build Coastguard Worker {
759*4bdc9457SAndroid Build Coastguard Worker for (size_t tile_start = 0; tile_start < channels; tile_start += channels_tile) {
760*4bdc9457SAndroid Build Coastguard Worker const size_t tile_size = min(channels - tile_start, channels_tile);
761*4bdc9457SAndroid Build Coastguard Worker for (size_t tile_offset = 0; tile_offset < tile_size; tile_offset++) {
762*4bdc9457SAndroid Build Coastguard Worker unaligned_indexed_store_f32(packed_w, tile_offset, scale[tile_start + tile_offset]);
763*4bdc9457SAndroid Build Coastguard Worker }
764*4bdc9457SAndroid Build Coastguard Worker packed_w = (void*) ((uintptr_t) packed_w + stride);
765*4bdc9457SAndroid Build Coastguard Worker }
766*4bdc9457SAndroid Build Coastguard Worker }
767*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_fp32_scalar_fmagic_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)768*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_fmagic_params(
769*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
770*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
771*4bdc9457SAndroid Build Coastguard Worker float scale,
772*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
773*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
774*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
775*4bdc9457SAndroid Build Coastguard Worker {
776*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
777*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
778*4bdc9457SAndroid Build Coastguard Worker
779*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.init_bias = init_bias;
780*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
781*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
782*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
783*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
784*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
785*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_fmagic);
786*4bdc9457SAndroid Build Coastguard Worker }
787*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_scalar_fmagic_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)788*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_scalar_fmagic_params(
789*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
790*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
791*4bdc9457SAndroid Build Coastguard Worker float scale)
792*4bdc9457SAndroid Build Coastguard Worker {
793*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
794*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
795*4bdc9457SAndroid Build Coastguard Worker
796*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.init_bias = init_bias;
797*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
798*4bdc9457SAndroid Build Coastguard Worker }
799*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)800*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_imagic_params(
801*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
802*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
803*4bdc9457SAndroid Build Coastguard Worker float scale,
804*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
805*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
806*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
807*4bdc9457SAndroid Build Coastguard Worker {
808*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
809*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
810*4bdc9457SAndroid Build Coastguard Worker
811*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
812*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
813*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.init_bias = init_bias;
814*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
815*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias = 12582912.0f;
816*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
817*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
818*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
819*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_imagic);
820*4bdc9457SAndroid Build Coastguard Worker }
821*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)822*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_scalar_imagic_params(
823*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
824*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
825*4bdc9457SAndroid Build Coastguard Worker float scale)
826*4bdc9457SAndroid Build Coastguard Worker {
827*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
828*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
829*4bdc9457SAndroid Build Coastguard Worker
830*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.init_bias = init_bias;
831*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
832*4bdc9457SAndroid Build Coastguard Worker }
833*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_fp32_scalar_lrintf_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)834*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_scalar_lrintf_params(
835*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
836*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
837*4bdc9457SAndroid Build Coastguard Worker float scale,
838*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
839*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
840*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
841*4bdc9457SAndroid Build Coastguard Worker {
842*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
843*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
844*4bdc9457SAndroid Build Coastguard Worker
845*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.init_bias = init_bias;
846*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
847*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
848*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
849*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
850*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_lrintf);
851*4bdc9457SAndroid Build Coastguard Worker }
852*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_scalar_lrintf_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)853*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_scalar_lrintf_params(
854*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
855*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
856*4bdc9457SAndroid Build Coastguard Worker float scale)
857*4bdc9457SAndroid Build Coastguard Worker {
858*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
859*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
860*4bdc9457SAndroid Build Coastguard Worker
861*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.init_bias = init_bias;
862*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
863*4bdc9457SAndroid Build Coastguard Worker }
864*4bdc9457SAndroid Build Coastguard Worker
865*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_avgpool_minmax_fp32_sse2_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)866*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_sse2_params(
867*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
868*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
869*4bdc9457SAndroid Build Coastguard Worker float scale,
870*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
871*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
872*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
873*4bdc9457SAndroid Build Coastguard Worker {
874*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
875*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
876*4bdc9457SAndroid Build Coastguard Worker
877*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
878*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
879*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.init_bias[i] = init_bias;
880*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
881*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
882*4bdc9457SAndroid Build Coastguard Worker }
883*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
884*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
885*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = (int16_t) output_min;
886*4bdc9457SAndroid Build Coastguard Worker }
887*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
888*4bdc9457SAndroid Build Coastguard Worker }
889*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_sse2_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)890*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_sse2_params(
891*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
892*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
893*4bdc9457SAndroid Build Coastguard Worker float scale)
894*4bdc9457SAndroid Build Coastguard Worker {
895*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
896*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
897*4bdc9457SAndroid Build Coastguard Worker
898*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
899*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.init_bias[i] = init_bias;
900*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
901*4bdc9457SAndroid Build Coastguard Worker }
902*4bdc9457SAndroid Build Coastguard Worker }
903*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_fp32_sse4_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)904*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_sse4_params(
905*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
906*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
907*4bdc9457SAndroid Build Coastguard Worker float scale,
908*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
909*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
910*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
911*4bdc9457SAndroid Build Coastguard Worker {
912*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
913*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
914*4bdc9457SAndroid Build Coastguard Worker
915*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
916*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
917*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.init_bias[i] = init_bias;
918*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = scale;
919*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
920*4bdc9457SAndroid Build Coastguard Worker }
921*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
922*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
923*4bdc9457SAndroid Build Coastguard Worker }
924*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
925*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_min[i] = output_min;
926*4bdc9457SAndroid Build Coastguard Worker }
927*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse4);
928*4bdc9457SAndroid Build Coastguard Worker }
929*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_sse4_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)930*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_sse4_params(
931*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
932*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
933*4bdc9457SAndroid Build Coastguard Worker float scale)
934*4bdc9457SAndroid Build Coastguard Worker {
935*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
936*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
937*4bdc9457SAndroid Build Coastguard Worker
938*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
939*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.init_bias[i] = init_bias;
940*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = scale;
941*4bdc9457SAndroid Build Coastguard Worker }
942*4bdc9457SAndroid Build Coastguard Worker }
943*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
944*4bdc9457SAndroid Build Coastguard Worker
945*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_avgpool_minmax_fp32_neon_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)946*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_neon_params(
947*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
948*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
949*4bdc9457SAndroid Build Coastguard Worker float scale,
950*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
951*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
952*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
953*4bdc9457SAndroid Build Coastguard Worker {
954*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
955*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
956*4bdc9457SAndroid Build Coastguard Worker
957*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.init_bias = init_bias;
958*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
959*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
960*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
961*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
962*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
963*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
964*4bdc9457SAndroid Build Coastguard Worker }
965*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_neon_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)966*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_neon_params(
967*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
968*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
969*4bdc9457SAndroid Build Coastguard Worker float scale)
970*4bdc9457SAndroid Build Coastguard Worker {
971*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
972*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
973*4bdc9457SAndroid Build Coastguard Worker
974*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.init_bias = init_bias;
975*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
976*4bdc9457SAndroid Build Coastguard Worker }
977*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_fp32_neonv8_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)978*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_neonv8_params(
979*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
980*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
981*4bdc9457SAndroid Build Coastguard Worker float scale,
982*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
983*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
984*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
985*4bdc9457SAndroid Build Coastguard Worker {
986*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
987*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
988*4bdc9457SAndroid Build Coastguard Worker
989*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.init_bias = init_bias;
990*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
991*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
992*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
993*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
994*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
995*4bdc9457SAndroid Build Coastguard Worker }
996*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_neonv8_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)997*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_neonv8_params(
998*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
999*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1000*4bdc9457SAndroid Build Coastguard Worker float scale)
1001*4bdc9457SAndroid Build Coastguard Worker {
1002*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1003*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1004*4bdc9457SAndroid Build Coastguard Worker
1005*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.init_bias = init_bias;
1006*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
1007*4bdc9457SAndroid Build Coastguard Worker }
1008*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_avgpool_minmax_rndnu_neon_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)1009*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_rndnu_neon_params(
1010*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1011*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1012*4bdc9457SAndroid Build Coastguard Worker float scale,
1013*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
1014*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
1015*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
1016*4bdc9457SAndroid Build Coastguard Worker {
1017*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1018*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1019*4bdc9457SAndroid Build Coastguard Worker
1020*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1021*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1022*4bdc9457SAndroid Build Coastguard Worker
1023*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1024*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1025*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
1026*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
1027*4bdc9457SAndroid Build Coastguard Worker
1028*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
1029*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1030*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
1031*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
1032*4bdc9457SAndroid Build Coastguard Worker
1033*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1034*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
1035*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
1036*4bdc9457SAndroid Build Coastguard Worker
1037*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.init_bias = init_bias;
1038*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
1039*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
1040*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
1041*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
1042*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
1043*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
1044*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
1045*4bdc9457SAndroid Build Coastguard Worker }
1046*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_rndnu_neon_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1047*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_rndnu_neon_params(
1048*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1049*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1050*4bdc9457SAndroid Build Coastguard Worker float scale)
1051*4bdc9457SAndroid Build Coastguard Worker {
1052*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1053*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1054*4bdc9457SAndroid Build Coastguard Worker
1055*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1056*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1057*4bdc9457SAndroid Build Coastguard Worker
1058*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1059*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1060*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
1061*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
1062*4bdc9457SAndroid Build Coastguard Worker
1063*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
1064*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1065*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
1066*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
1067*4bdc9457SAndroid Build Coastguard Worker
1068*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1069*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
1070*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
1071*4bdc9457SAndroid Build Coastguard Worker
1072*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.init_bias = init_bias;
1073*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
1074*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
1075*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
1076*4bdc9457SAndroid Build Coastguard Worker }
1077*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1078*4bdc9457SAndroid Build Coastguard Worker
1079*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_avgpool_minmax_fp32_wasmsimd_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)1080*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_avgpool_minmax_fp32_wasmsimd_params(
1081*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1082*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1083*4bdc9457SAndroid Build Coastguard Worker float scale,
1084*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
1085*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
1086*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
1087*4bdc9457SAndroid Build Coastguard Worker {
1088*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1089*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1090*4bdc9457SAndroid Build Coastguard Worker
1091*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1092*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1093*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1094*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
1095*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.init_bias[i] = init_bias;
1096*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
1097*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
1098*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
1099*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
1100*4bdc9457SAndroid Build Coastguard Worker }
1101*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1102*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
1103*4bdc9457SAndroid Build Coastguard Worker }
1104*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
1105*4bdc9457SAndroid Build Coastguard Worker }
1106*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qs8_avgpool_minmax_fp32_wasmsimd_params(union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1107*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qs8_avgpool_minmax_fp32_wasmsimd_params(
1108*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1109*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1110*4bdc9457SAndroid Build Coastguard Worker float scale)
1111*4bdc9457SAndroid Build Coastguard Worker {
1112*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1113*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1114*4bdc9457SAndroid Build Coastguard Worker
1115*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
1116*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.init_bias[i] = init_bias;
1117*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
1118*4bdc9457SAndroid Build Coastguard Worker }
1119*4bdc9457SAndroid Build Coastguard Worker }
1120*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1121*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_fp32_scalar_fmagic_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1122*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_fmagic_params(
1123*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1124*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1125*4bdc9457SAndroid Build Coastguard Worker float scale,
1126*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1127*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1128*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1129*4bdc9457SAndroid Build Coastguard Worker {
1130*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1131*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1132*4bdc9457SAndroid Build Coastguard Worker
1133*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.init_bias = init_bias;
1134*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
1135*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1136*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1137*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias = 12582912.0f;
1138*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1139*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_fmagic);
1140*4bdc9457SAndroid Build Coastguard Worker }
1141*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_scalar_fmagic_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1142*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_scalar_fmagic_params(
1143*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1144*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1145*4bdc9457SAndroid Build Coastguard Worker float scale)
1146*4bdc9457SAndroid Build Coastguard Worker {
1147*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1148*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1149*4bdc9457SAndroid Build Coastguard Worker
1150*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.init_bias = init_bias;
1151*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_fmagic.scale = scale;
1152*4bdc9457SAndroid Build Coastguard Worker }
1153*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1154*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_imagic_params(
1155*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1156*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1157*4bdc9457SAndroid Build Coastguard Worker float scale,
1158*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1159*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1160*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1161*4bdc9457SAndroid Build Coastguard Worker {
1162*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1163*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1164*4bdc9457SAndroid Build Coastguard Worker
1165*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1166*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1167*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.init_bias = init_bias;
1168*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
1169*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias = 12582912.0f;
1170*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1171*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
1172*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1173*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_imagic);
1174*4bdc9457SAndroid Build Coastguard Worker }
1175*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1176*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_scalar_imagic_params(
1177*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1178*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1179*4bdc9457SAndroid Build Coastguard Worker float scale)
1180*4bdc9457SAndroid Build Coastguard Worker {
1181*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1182*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1183*4bdc9457SAndroid Build Coastguard Worker
1184*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.init_bias = init_bias;
1185*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_imagic.scale = scale;
1186*4bdc9457SAndroid Build Coastguard Worker }
1187*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_fp32_scalar_lrintf_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1188*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_scalar_lrintf_params(
1189*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1190*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1191*4bdc9457SAndroid Build Coastguard Worker float scale,
1192*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1193*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1194*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1195*4bdc9457SAndroid Build Coastguard Worker {
1196*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1197*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1198*4bdc9457SAndroid Build Coastguard Worker
1199*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.init_bias = init_bias;
1200*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
1201*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1202*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1203*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
1204*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar_lrintf);
1205*4bdc9457SAndroid Build Coastguard Worker }
1206*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_scalar_lrintf_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1207*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_scalar_lrintf_params(
1208*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1209*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1210*4bdc9457SAndroid Build Coastguard Worker float scale)
1211*4bdc9457SAndroid Build Coastguard Worker {
1212*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1213*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1214*4bdc9457SAndroid Build Coastguard Worker
1215*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.init_bias = init_bias;
1216*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar_lrintf.scale = scale;
1217*4bdc9457SAndroid Build Coastguard Worker }
1218*4bdc9457SAndroid Build Coastguard Worker
1219*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_avgpool_minmax_fp32_sse2_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1220*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_sse2_params(
1221*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1222*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1223*4bdc9457SAndroid Build Coastguard Worker float scale,
1224*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1225*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1226*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1227*4bdc9457SAndroid Build Coastguard Worker {
1228*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1229*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1230*4bdc9457SAndroid Build Coastguard Worker
1231*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1232*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1233*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.init_bias[i] = init_bias;
1234*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
1235*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
1236*4bdc9457SAndroid Build Coastguard Worker }
1237*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1238*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
1239*4bdc9457SAndroid Build Coastguard Worker }
1240*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
1241*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = output_min;
1242*4bdc9457SAndroid Build Coastguard Worker }
1243*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
1244*4bdc9457SAndroid Build Coastguard Worker }
1245*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_sse2_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1246*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_sse2_params(
1247*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1248*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1249*4bdc9457SAndroid Build Coastguard Worker float scale)
1250*4bdc9457SAndroid Build Coastguard Worker {
1251*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1252*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1253*4bdc9457SAndroid Build Coastguard Worker
1254*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1255*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.init_bias[i] = init_bias;
1256*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = scale;
1257*4bdc9457SAndroid Build Coastguard Worker }
1258*4bdc9457SAndroid Build Coastguard Worker }
1259*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_fp32_sse4_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1260*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_sse4_params(
1261*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1262*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1263*4bdc9457SAndroid Build Coastguard Worker float scale,
1264*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1265*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1266*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1267*4bdc9457SAndroid Build Coastguard Worker {
1268*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1269*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1270*4bdc9457SAndroid Build Coastguard Worker
1271*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
1272*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1273*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.init_bias[i] = init_bias;
1274*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = scale;
1275*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
1276*4bdc9457SAndroid Build Coastguard Worker }
1277*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1278*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
1279*4bdc9457SAndroid Build Coastguard Worker }
1280*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
1281*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_min[i] = output_min;
1282*4bdc9457SAndroid Build Coastguard Worker }
1283*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse4);
1284*4bdc9457SAndroid Build Coastguard Worker }
1285*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_sse4_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1286*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_sse4_params(
1287*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1288*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1289*4bdc9457SAndroid Build Coastguard Worker float scale)
1290*4bdc9457SAndroid Build Coastguard Worker {
1291*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1292*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1293*4bdc9457SAndroid Build Coastguard Worker
1294*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1295*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.init_bias[i] = init_bias;
1296*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = scale;
1297*4bdc9457SAndroid Build Coastguard Worker }
1298*4bdc9457SAndroid Build Coastguard Worker }
1299*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1300*4bdc9457SAndroid Build Coastguard Worker
1301*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_avgpool_minmax_fp32_neon_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1302*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_neon_params(
1303*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1304*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1305*4bdc9457SAndroid Build Coastguard Worker float scale,
1306*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1307*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1308*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1309*4bdc9457SAndroid Build Coastguard Worker {
1310*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1311*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1312*4bdc9457SAndroid Build Coastguard Worker
1313*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.init_bias = init_bias;
1314*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
1315*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
1316*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1317*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
1318*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
1319*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
1320*4bdc9457SAndroid Build Coastguard Worker }
1321*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_neon_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1322*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_neon_params(
1323*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1324*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1325*4bdc9457SAndroid Build Coastguard Worker float scale)
1326*4bdc9457SAndroid Build Coastguard Worker {
1327*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1328*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1329*4bdc9457SAndroid Build Coastguard Worker
1330*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.init_bias = init_bias;
1331*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = scale;
1332*4bdc9457SAndroid Build Coastguard Worker }
1333*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_fp32_neonv8_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1334*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_neonv8_params(
1335*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1336*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1337*4bdc9457SAndroid Build Coastguard Worker float scale,
1338*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1339*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1340*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1341*4bdc9457SAndroid Build Coastguard Worker {
1342*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1343*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1344*4bdc9457SAndroid Build Coastguard Worker
1345*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.init_bias = init_bias;
1346*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
1347*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
1348*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
1349*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
1350*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
1351*4bdc9457SAndroid Build Coastguard Worker }
1352*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_neonv8_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1353*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_neonv8_params(
1354*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1355*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1356*4bdc9457SAndroid Build Coastguard Worker float scale)
1357*4bdc9457SAndroid Build Coastguard Worker {
1358*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1359*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1360*4bdc9457SAndroid Build Coastguard Worker
1361*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.init_bias = init_bias;
1362*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = scale;
1363*4bdc9457SAndroid Build Coastguard Worker }
1364*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_rndnu_neon_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1365*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_rndnu_neon_params(
1366*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1367*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1368*4bdc9457SAndroid Build Coastguard Worker float scale,
1369*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1370*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1371*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1372*4bdc9457SAndroid Build Coastguard Worker {
1373*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1374*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1375*4bdc9457SAndroid Build Coastguard Worker
1376*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1377*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1378*4bdc9457SAndroid Build Coastguard Worker
1379*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1380*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1381*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
1382*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
1383*4bdc9457SAndroid Build Coastguard Worker
1384*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
1385*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1386*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
1387*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
1388*4bdc9457SAndroid Build Coastguard Worker
1389*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1390*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
1391*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
1392*4bdc9457SAndroid Build Coastguard Worker
1393*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.init_bias = init_bias;
1394*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
1395*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
1396*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
1397*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
1398*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
1399*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
1400*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
1401*4bdc9457SAndroid Build Coastguard Worker }
1402*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_rndnu_neon_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1403*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_rndnu_neon_params(
1404*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1405*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1406*4bdc9457SAndroid Build Coastguard Worker float scale)
1407*4bdc9457SAndroid Build Coastguard Worker {
1408*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1409*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1410*4bdc9457SAndroid Build Coastguard Worker
1411*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1412*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1413*4bdc9457SAndroid Build Coastguard Worker
1414*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
1415*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
1416*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
1417*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
1418*4bdc9457SAndroid Build Coastguard Worker
1419*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 31] range.
1420*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
1421*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
1422*4bdc9457SAndroid Build Coastguard Worker assert(shift < 32);
1423*4bdc9457SAndroid Build Coastguard Worker
1424*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 31] range.
1425*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
1426*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
1427*4bdc9457SAndroid Build Coastguard Worker
1428*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.init_bias = init_bias;
1429*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
1430*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
1431*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
1432*4bdc9457SAndroid Build Coastguard Worker }
1433*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1434*4bdc9457SAndroid Build Coastguard Worker
1435*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_avgpool_minmax_fp32_wasmsimd_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1436*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_fp32_wasmsimd_params(
1437*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1438*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1439*4bdc9457SAndroid Build Coastguard Worker float scale,
1440*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1441*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1442*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1443*4bdc9457SAndroid Build Coastguard Worker {
1444*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1445*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1446*4bdc9457SAndroid Build Coastguard Worker
1447*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
1448*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
1449*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
1450*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
1451*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.init_bias[i] = init_bias;
1452*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
1453*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
1454*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
1455*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_zero_point;
1456*4bdc9457SAndroid Build Coastguard Worker }
1457*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1458*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
1459*4bdc9457SAndroid Build Coastguard Worker }
1460*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
1461*4bdc9457SAndroid Build Coastguard Worker }
1462*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_fp32_wasmsimd_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t init_bias,float scale)1463*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_fp32_wasmsimd_params(
1464*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1465*4bdc9457SAndroid Build Coastguard Worker int32_t init_bias,
1466*4bdc9457SAndroid Build Coastguard Worker float scale)
1467*4bdc9457SAndroid Build Coastguard Worker {
1468*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1469*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1470*4bdc9457SAndroid Build Coastguard Worker
1471*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
1472*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.init_bias[i] = init_bias;
1473*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = scale;
1474*4bdc9457SAndroid Build Coastguard Worker }
1475*4bdc9457SAndroid Build Coastguard Worker }
1476*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1477*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_avgpool_minmax_scalar_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1478*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_scalar_params(
1479*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1480*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1481*4bdc9457SAndroid Build Coastguard Worker float scale,
1482*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1483*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1484*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1485*4bdc9457SAndroid Build Coastguard Worker {
1486*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1487*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1488*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1489*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1490*4bdc9457SAndroid Build Coastguard Worker
1491*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1492*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1493*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1494*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1495*4bdc9457SAndroid Build Coastguard Worker
1496*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1497*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1498*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1499*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1500*4bdc9457SAndroid Build Coastguard Worker
1501*4bdc9457SAndroid Build Coastguard Worker const uint32_t right_shift = (uint32_t) shift;
1502*4bdc9457SAndroid Build Coastguard Worker const int64_t rounding = INT64_C(1) << (right_shift - 1);
1503*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = bias;
1504*4bdc9457SAndroid Build Coastguard Worker params->scalar.rounding = rounding;
1505*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = multiplier;
1506*4bdc9457SAndroid Build Coastguard Worker params->scalar.right_shift = right_shift;
1507*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_min_less_zero_point =
1508*4bdc9457SAndroid Build Coastguard Worker (int32_t) (uint32_t) output_min - (int32_t) (uint32_t) output_zero_point;
1509*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_max_less_zero_point =
1510*4bdc9457SAndroid Build Coastguard Worker (int32_t) (uint32_t) output_max - (int32_t) (uint32_t) output_zero_point;
1511*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_zero_point = (int32_t) (uint32_t) output_zero_point;
1512*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
1513*4bdc9457SAndroid Build Coastguard Worker }
1514*4bdc9457SAndroid Build Coastguard Worker
1515*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_avgpool_minmax_neon_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1516*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_neon_params(
1517*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1518*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1519*4bdc9457SAndroid Build Coastguard Worker float scale,
1520*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1521*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1522*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1523*4bdc9457SAndroid Build Coastguard Worker {
1524*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1525*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1526*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1527*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1528*4bdc9457SAndroid Build Coastguard Worker
1529*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1530*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1531*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1532*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1533*4bdc9457SAndroid Build Coastguard Worker
1534*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1535*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1536*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1537*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1538*4bdc9457SAndroid Build Coastguard Worker
1539*4bdc9457SAndroid Build Coastguard Worker params->neon.bias = bias;
1540*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = multiplier;
1541*4bdc9457SAndroid Build Coastguard Worker params->neon.left_shift = (int64_t) -shift;
1542*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
1543*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
1544*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
1545*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
1546*4bdc9457SAndroid Build Coastguard Worker }
1547*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1548*4bdc9457SAndroid Build Coastguard Worker
1549*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_avgpool_minmax_sse2_params(union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS (1)],int32_t bias,float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)1550*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_avgpool_minmax_sse2_params(
1551*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params params[XNN_MIN_ELEMENTS(1)],
1552*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1553*4bdc9457SAndroid Build Coastguard Worker float scale,
1554*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
1555*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
1556*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
1557*4bdc9457SAndroid Build Coastguard Worker {
1558*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1559*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1560*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1561*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1562*4bdc9457SAndroid Build Coastguard Worker
1563*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1564*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1565*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1566*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1567*4bdc9457SAndroid Build Coastguard Worker
1568*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1569*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1570*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1571*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1572*4bdc9457SAndroid Build Coastguard Worker
1573*4bdc9457SAndroid Build Coastguard Worker const uint32_t right_shift = (uint32_t) shift;
1574*4bdc9457SAndroid Build Coastguard Worker const uint64_t rounding = UINT64_C(1) << (right_shift - 1);
1575*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[0] = bias;
1576*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[1] = bias;
1577*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[2] = bias;
1578*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[3] = bias;
1579*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[0] = (uint32_t) multiplier;
1580*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[1] = (uint32_t) multiplier;
1581*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[2] = (uint32_t) multiplier;
1582*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[3] = (uint32_t) multiplier;
1583*4bdc9457SAndroid Build Coastguard Worker params->sse2.rounding[0] = rounding;
1584*4bdc9457SAndroid Build Coastguard Worker params->sse2.rounding[1] = rounding;
1585*4bdc9457SAndroid Build Coastguard Worker params->sse2.right_shift[0] = (uint64_t) right_shift;
1586*4bdc9457SAndroid Build Coastguard Worker params->sse2.right_shift[1] = (uint64_t) right_shift;
1587*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1588*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
1589*4bdc9457SAndroid Build Coastguard Worker }
1590*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
1591*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_min[i] = output_min;
1592*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_max[i] = output_max;
1593*4bdc9457SAndroid Build Coastguard Worker }
1594*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
1595*4bdc9457SAndroid Build Coastguard Worker }
1596*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1597*4bdc9457SAndroid Build Coastguard Worker
xnn_update_qu8_avgpool_minmax_scalar_params(union xnn_qu8_avgpool_minmax_params * params,int32_t bias,float scale)1598*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_scalar_params(
1599*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params* params,
1600*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1601*4bdc9457SAndroid Build Coastguard Worker float scale)
1602*4bdc9457SAndroid Build Coastguard Worker {
1603*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1604*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1605*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1606*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1607*4bdc9457SAndroid Build Coastguard Worker
1608*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1609*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1610*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1611*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1612*4bdc9457SAndroid Build Coastguard Worker
1613*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1614*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1615*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1616*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1617*4bdc9457SAndroid Build Coastguard Worker
1618*4bdc9457SAndroid Build Coastguard Worker const int64_t rounding = INT64_C(1) << ((uint32_t) shift - 1);
1619*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = bias;
1620*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = multiplier;
1621*4bdc9457SAndroid Build Coastguard Worker params->scalar.rounding = rounding;
1622*4bdc9457SAndroid Build Coastguard Worker params->scalar.right_shift = (uint32_t) shift;
1623*4bdc9457SAndroid Build Coastguard Worker }
1624*4bdc9457SAndroid Build Coastguard Worker
1625*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_update_qu8_avgpool_minmax_neon_params(union xnn_qu8_avgpool_minmax_params * params,int32_t bias,float scale)1626*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_neon_params(
1627*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params* params,
1628*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1629*4bdc9457SAndroid Build Coastguard Worker float scale)
1630*4bdc9457SAndroid Build Coastguard Worker {
1631*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1632*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1633*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1634*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1635*4bdc9457SAndroid Build Coastguard Worker
1636*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1637*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1638*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1639*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1640*4bdc9457SAndroid Build Coastguard Worker
1641*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1642*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1643*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1644*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1645*4bdc9457SAndroid Build Coastguard Worker
1646*4bdc9457SAndroid Build Coastguard Worker params->neon.bias = bias;
1647*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = multiplier;
1648*4bdc9457SAndroid Build Coastguard Worker params->neon.left_shift = (int64_t) -shift;
1649*4bdc9457SAndroid Build Coastguard Worker }
1650*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1651*4bdc9457SAndroid Build Coastguard Worker
1652*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_update_qu8_avgpool_minmax_sse2_params(union xnn_qu8_avgpool_minmax_params * params,int32_t bias,float scale)1653*4bdc9457SAndroid Build Coastguard Worker void xnn_update_qu8_avgpool_minmax_sse2_params(
1654*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_avgpool_minmax_params* params,
1655*4bdc9457SAndroid Build Coastguard Worker int32_t bias,
1656*4bdc9457SAndroid Build Coastguard Worker float scale)
1657*4bdc9457SAndroid Build Coastguard Worker {
1658*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
1659*4bdc9457SAndroid Build Coastguard Worker assert(scale >= 0x1.0p-32f);
1660*4bdc9457SAndroid Build Coastguard Worker assert(scale < 256.0f);
1661*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(scale);
1662*4bdc9457SAndroid Build Coastguard Worker
1663*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x00800000, 0x00FFFFFF] range.
1664*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = ((int32_t) scale_bits & INT32_C(0x007FFFFF)) | INT32_C(0x00800000);
1665*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x00800000));
1666*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x00FFFFFF));
1667*4bdc9457SAndroid Build Coastguard Worker
1668*4bdc9457SAndroid Build Coastguard Worker // Shift is in [16, 55] range.
1669*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 23 - (scale_bits >> 23);
1670*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 16);
1671*4bdc9457SAndroid Build Coastguard Worker assert(shift < 64);
1672*4bdc9457SAndroid Build Coastguard Worker
1673*4bdc9457SAndroid Build Coastguard Worker const uint64_t rounding = UINT64_C(1) << ((uint32_t) shift - 1);
1674*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[0] = bias;
1675*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[1] = bias;
1676*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[2] = bias;
1677*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[3] = bias;
1678*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[0] = (uint32_t) multiplier;
1679*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[1] = (uint32_t) multiplier;
1680*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[2] = (uint32_t) multiplier;
1681*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[3] = (uint32_t) multiplier;
1682*4bdc9457SAndroid Build Coastguard Worker params->sse2.rounding[0] = rounding;
1683*4bdc9457SAndroid Build Coastguard Worker params->sse2.rounding[1] = rounding;
1684*4bdc9457SAndroid Build Coastguard Worker params->sse2.right_shift[0] = (uint64_t) (uint32_t) shift;
1685*4bdc9457SAndroid Build Coastguard Worker params->sse2.right_shift[1] = (uint64_t) (uint32_t) shift;
1686*4bdc9457SAndroid Build Coastguard Worker }
1687*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1688*4bdc9457SAndroid Build Coastguard Worker
xnn_update_f32_scaleminmax_scalar_params(union xnn_f32_scaleminmax_params * params,float scale)1689*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f32_scaleminmax_scalar_params(
1690*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params* params,
1691*4bdc9457SAndroid Build Coastguard Worker float scale)
1692*4bdc9457SAndroid Build Coastguard Worker {
1693*4bdc9457SAndroid Build Coastguard Worker params->scalar.scale = scale;
1694*4bdc9457SAndroid Build Coastguard Worker }
1695*4bdc9457SAndroid Build Coastguard Worker
1696*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_update_f32_scaleminmax_sse_params(union xnn_f32_scaleminmax_params * params,float scale)1697*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f32_scaleminmax_sse_params(
1698*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params* params,
1699*4bdc9457SAndroid Build Coastguard Worker float scale)
1700*4bdc9457SAndroid Build Coastguard Worker {
1701*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1702*4bdc9457SAndroid Build Coastguard Worker params->sse.scale[i] = scale;
1703*4bdc9457SAndroid Build Coastguard Worker }
1704*4bdc9457SAndroid Build Coastguard Worker }
1705*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1706*4bdc9457SAndroid Build Coastguard Worker
1707*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_scaleminmax_neon_params(union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t scale,uint16_t min,uint16_t max)1708*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_scaleminmax_neon_params(
1709*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1710*4bdc9457SAndroid Build Coastguard Worker uint16_t scale,
1711*4bdc9457SAndroid Build Coastguard Worker uint16_t min,
1712*4bdc9457SAndroid Build Coastguard Worker uint16_t max)
1713*4bdc9457SAndroid Build Coastguard Worker {
1714*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
1715*4bdc9457SAndroid Build Coastguard Worker params->neon.min = min;
1716*4bdc9457SAndroid Build Coastguard Worker params->neon.max = max;
1717*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
1718*4bdc9457SAndroid Build Coastguard Worker }
1719*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1720*4bdc9457SAndroid Build Coastguard Worker
1721*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_scaleminmax_avx_params(union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t scale,uint16_t min,uint16_t max)1722*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_scaleminmax_avx_params(
1723*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1724*4bdc9457SAndroid Build Coastguard Worker uint16_t scale,
1725*4bdc9457SAndroid Build Coastguard Worker uint16_t min,
1726*4bdc9457SAndroid Build Coastguard Worker uint16_t max)
1727*4bdc9457SAndroid Build Coastguard Worker {
1728*4bdc9457SAndroid Build Coastguard Worker const float scale_f32 = fp16_ieee_to_fp32_value(scale);
1729*4bdc9457SAndroid Build Coastguard Worker const float min_f32 = fp16_ieee_to_fp32_value(min);
1730*4bdc9457SAndroid Build Coastguard Worker const float max_f32 = fp16_ieee_to_fp32_value(max);
1731*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1732*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale_f32;
1733*4bdc9457SAndroid Build Coastguard Worker params->avx.min[i] = min_f32;
1734*4bdc9457SAndroid Build Coastguard Worker params->avx.max[i] = max_f32;
1735*4bdc9457SAndroid Build Coastguard Worker }
1736*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
1737*4bdc9457SAndroid Build Coastguard Worker }
1738*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1739*4bdc9457SAndroid Build Coastguard Worker
1740*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_update_f16_scaleminmax_neon_params(union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t scale)1741*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f16_scaleminmax_neon_params(
1742*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1743*4bdc9457SAndroid Build Coastguard Worker uint16_t scale)
1744*4bdc9457SAndroid Build Coastguard Worker {
1745*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
1746*4bdc9457SAndroid Build Coastguard Worker }
1747*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1748*4bdc9457SAndroid Build Coastguard Worker
1749*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_update_f16_scaleminmax_avx_params(union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t scale)1750*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f16_scaleminmax_avx_params(
1751*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1752*4bdc9457SAndroid Build Coastguard Worker uint16_t scale)
1753*4bdc9457SAndroid Build Coastguard Worker {
1754*4bdc9457SAndroid Build Coastguard Worker const float scale_f32 = fp16_ieee_to_fp32_value(scale);
1755*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1756*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale_f32;
1757*4bdc9457SAndroid Build Coastguard Worker }
1758*4bdc9457SAndroid Build Coastguard Worker }
1759*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1760*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_scaleminmax_scalar_params(union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],float scale,float min,float max)1761*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_scaleminmax_scalar_params(
1762*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1763*4bdc9457SAndroid Build Coastguard Worker float scale,
1764*4bdc9457SAndroid Build Coastguard Worker float min,
1765*4bdc9457SAndroid Build Coastguard Worker float max)
1766*4bdc9457SAndroid Build Coastguard Worker {
1767*4bdc9457SAndroid Build Coastguard Worker params->scalar.scale = scale;
1768*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = min;
1769*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = max;
1770*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
1771*4bdc9457SAndroid Build Coastguard Worker }
1772*4bdc9457SAndroid Build Coastguard Worker
1773*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_scaleminmax_sse_params(union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS (1)],float scale,float min,float max)1774*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_scaleminmax_sse_params(
1775*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_scaleminmax_params params[XNN_MIN_ELEMENTS(1)],
1776*4bdc9457SAndroid Build Coastguard Worker float scale,
1777*4bdc9457SAndroid Build Coastguard Worker float min,
1778*4bdc9457SAndroid Build Coastguard Worker float max)
1779*4bdc9457SAndroid Build Coastguard Worker {
1780*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1781*4bdc9457SAndroid Build Coastguard Worker params->sse.scale[i] = scale;
1782*4bdc9457SAndroid Build Coastguard Worker params->sse.min[i] = min;
1783*4bdc9457SAndroid Build Coastguard Worker params->sse.max[i] = max;
1784*4bdc9457SAndroid Build Coastguard Worker }
1785*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
1786*4bdc9457SAndroid Build Coastguard Worker }
1787*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1788*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_gavgpool_params(union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS (1)],float multiplier,float output_min,float output_max,uint32_t width)1789*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_gavgpool_params(
1790*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1791*4bdc9457SAndroid Build Coastguard Worker float multiplier,
1792*4bdc9457SAndroid Build Coastguard Worker float output_min,
1793*4bdc9457SAndroid Build Coastguard Worker float output_max,
1794*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
1795*4bdc9457SAndroid Build Coastguard Worker {
1796*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
1797*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1798*4bdc9457SAndroid Build Coastguard Worker params->sse.multiplier[i] = multiplier;
1799*4bdc9457SAndroid Build Coastguard Worker params->sse.output_min[i] = output_min;
1800*4bdc9457SAndroid Build Coastguard Worker params->sse.output_max[i] = output_max;
1801*4bdc9457SAndroid Build Coastguard Worker }
1802*4bdc9457SAndroid Build Coastguard Worker
1803*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1804*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
1805*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[1] = -(uint32_t) (w >= 1);
1806*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[2] = -(uint32_t) (w >= 2);
1807*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[3] = -(uint32_t) (w >= 3);
1808*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
1809*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
1810*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = multiplier;
1811*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
1812*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
1813*4bdc9457SAndroid Build Coastguard Worker
1814*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1815*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
1816*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[1] = -(uint32_t) (w >= 1);
1817*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[2] = -(uint32_t) (w >= 2);
1818*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[3] = -(uint32_t) (w >= 3);
1819*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
1820*4bdc9457SAndroid Build Coastguard Worker #else
1821*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = multiplier;
1822*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_min = output_min;
1823*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_max = output_max;
1824*4bdc9457SAndroid Build Coastguard Worker
1825*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1826*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
1827*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(int32_t) (w >= 1);
1828*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(int32_t) (w >= 2);
1829*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(int32_t) (w >= 3);
1830*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
1831*4bdc9457SAndroid Build Coastguard Worker #endif
1832*4bdc9457SAndroid Build Coastguard Worker }
1833*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_gavgpool_neonfp16arith_x4_params(union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS (1)],uint16_t multiplier,uint16_t output_min,uint16_t output_max,uint32_t width)1834*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_gavgpool_neonfp16arith_x4_params(
1835*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1836*4bdc9457SAndroid Build Coastguard Worker uint16_t multiplier,
1837*4bdc9457SAndroid Build Coastguard Worker uint16_t output_min,
1838*4bdc9457SAndroid Build Coastguard Worker uint16_t output_max,
1839*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
1840*4bdc9457SAndroid Build Coastguard Worker {
1841*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1842*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.multiplier = multiplier;
1843*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.output_min = output_min;
1844*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.output_max = output_max;
1845*4bdc9457SAndroid Build Coastguard Worker
1846*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1847*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[0] = UINT16_C(0xFFFF);
1848*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[1] = -(uint16_t) (w >= 1);
1849*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[2] = -(uint16_t) (w >= 2);
1850*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[3] = -(uint16_t) (w >= 3);
1851*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith);
1852*4bdc9457SAndroid Build Coastguard Worker #else
1853*4bdc9457SAndroid Build Coastguard Worker return 0;
1854*4bdc9457SAndroid Build Coastguard Worker #endif
1855*4bdc9457SAndroid Build Coastguard Worker }
1856*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_gavgpool_neonfp16arith_x8_params(union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS (1)],uint16_t multiplier,uint16_t output_min,uint16_t output_max,uint32_t width)1857*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_gavgpool_neonfp16arith_x8_params(
1858*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1859*4bdc9457SAndroid Build Coastguard Worker uint16_t multiplier,
1860*4bdc9457SAndroid Build Coastguard Worker uint16_t output_min,
1861*4bdc9457SAndroid Build Coastguard Worker uint16_t output_max,
1862*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
1863*4bdc9457SAndroid Build Coastguard Worker {
1864*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
1865*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.multiplier = multiplier;
1866*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.output_min = output_min;
1867*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.output_max = output_max;
1868*4bdc9457SAndroid Build Coastguard Worker
1869*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 7;
1870*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[0] = UINT16_C(0xFFFF);
1871*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[1] = -(uint16_t) (w >= 1);
1872*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[2] = -(uint16_t) (w >= 2);
1873*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[3] = -(uint16_t) (w >= 3);
1874*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[4] = -(uint16_t) (w >= 4);
1875*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[5] = -(uint16_t) (w >= 5);
1876*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[6] = -(uint16_t) (w >= 6);
1877*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[7] = -(uint16_t) (w >= 7);
1878*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith);
1879*4bdc9457SAndroid Build Coastguard Worker #else
1880*4bdc9457SAndroid Build Coastguard Worker return 0;
1881*4bdc9457SAndroid Build Coastguard Worker #endif
1882*4bdc9457SAndroid Build Coastguard Worker }
1883*4bdc9457SAndroid Build Coastguard Worker
xnn_update_f32_gavgpool_params(union xnn_f32_gavgpool_params * params,float multiplier,uint32_t width)1884*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f32_gavgpool_params(
1885*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_gavgpool_params* params,
1886*4bdc9457SAndroid Build Coastguard Worker float multiplier,
1887*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
1888*4bdc9457SAndroid Build Coastguard Worker {
1889*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
1890*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1891*4bdc9457SAndroid Build Coastguard Worker params->sse.multiplier[i] = multiplier;
1892*4bdc9457SAndroid Build Coastguard Worker }
1893*4bdc9457SAndroid Build Coastguard Worker
1894*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1895*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
1896*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[1] = -(uint32_t) (w >= 1);
1897*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[2] = -(uint32_t) (w >= 2);
1898*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[3] = -(uint32_t) (w >= 3);
1899*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
1900*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = multiplier;
1901*4bdc9457SAndroid Build Coastguard Worker
1902*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1903*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
1904*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[1] = -(uint32_t) (w >= 1);
1905*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[2] = -(uint32_t) (w >= 2);
1906*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[3] = -(uint32_t) (w >= 3);
1907*4bdc9457SAndroid Build Coastguard Worker #else
1908*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = multiplier;
1909*4bdc9457SAndroid Build Coastguard Worker
1910*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1911*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
1912*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(int32_t) (w >= 1);
1913*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(int32_t) (w >= 2);
1914*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(int32_t) (w >= 3);
1915*4bdc9457SAndroid Build Coastguard Worker #endif
1916*4bdc9457SAndroid Build Coastguard Worker }
1917*4bdc9457SAndroid Build Coastguard Worker
xnn_init_scalar_f32_gavgpool_params(union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS (1)],float multiplier,float output_min,float output_max,uint32_t width)1918*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_scalar_f32_gavgpool_params(
1919*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_gavgpool_params params[XNN_MIN_ELEMENTS(1)],
1920*4bdc9457SAndroid Build Coastguard Worker float multiplier,
1921*4bdc9457SAndroid Build Coastguard Worker float output_min,
1922*4bdc9457SAndroid Build Coastguard Worker float output_max,
1923*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
1924*4bdc9457SAndroid Build Coastguard Worker {
1925*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = multiplier;
1926*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_min = output_min;
1927*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_max = output_max;
1928*4bdc9457SAndroid Build Coastguard Worker
1929*4bdc9457SAndroid Build Coastguard Worker const uint32_t w = (width - 1) & 3;
1930*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
1931*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(int32_t) (w >= 1);
1932*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(int32_t) (w >= 2);
1933*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(int32_t) (w >= 3);
1934*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
1935*4bdc9457SAndroid Build Coastguard Worker }
1936*4bdc9457SAndroid Build Coastguard Worker
xnn_init_bf16_minmax_scalar_params(union xnn_bf16_minmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t output_min,uint16_t output_max)1937*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_bf16_minmax_scalar_params(
1938*4bdc9457SAndroid Build Coastguard Worker union xnn_bf16_minmax_params params[XNN_MIN_ELEMENTS(1)],
1939*4bdc9457SAndroid Build Coastguard Worker uint16_t output_min,
1940*4bdc9457SAndroid Build Coastguard Worker uint16_t output_max)
1941*4bdc9457SAndroid Build Coastguard Worker {
1942*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = uint32_as_float((uint32_t) output_min << 16);
1943*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = uint32_as_float((uint32_t) output_max << 16);
1944*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
1945*4bdc9457SAndroid Build Coastguard Worker }
1946*4bdc9457SAndroid Build Coastguard Worker
1947*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_minmax_neon_params(union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t min,uint16_t max)1948*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_minmax_neon_params(
1949*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS(1)],
1950*4bdc9457SAndroid Build Coastguard Worker uint16_t min,
1951*4bdc9457SAndroid Build Coastguard Worker uint16_t max)
1952*4bdc9457SAndroid Build Coastguard Worker {
1953*4bdc9457SAndroid Build Coastguard Worker params->neon.min = min;
1954*4bdc9457SAndroid Build Coastguard Worker params->neon.max = max;
1955*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
1956*4bdc9457SAndroid Build Coastguard Worker }
1957*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1958*4bdc9457SAndroid Build Coastguard Worker
1959*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_minmax_avx_params(union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS (1)],uint16_t min,uint16_t max)1960*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_minmax_avx_params(
1961*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params params[XNN_MIN_ELEMENTS(1)],
1962*4bdc9457SAndroid Build Coastguard Worker uint16_t min,
1963*4bdc9457SAndroid Build Coastguard Worker uint16_t max)
1964*4bdc9457SAndroid Build Coastguard Worker {
1965*4bdc9457SAndroid Build Coastguard Worker const float min_f32 = fp16_ieee_to_fp32_value(min);
1966*4bdc9457SAndroid Build Coastguard Worker const float max_f32 = fp16_ieee_to_fp32_value(max);
1967*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
1968*4bdc9457SAndroid Build Coastguard Worker params->avx.min[i] = min_f32;
1969*4bdc9457SAndroid Build Coastguard Worker params->avx.max[i] = max_f32;
1970*4bdc9457SAndroid Build Coastguard Worker }
1971*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
1972*4bdc9457SAndroid Build Coastguard Worker }
1973*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1974*4bdc9457SAndroid Build Coastguard Worker
1975*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_default_avx_params(union xnn_f32_default_params params[XNN_MIN_ELEMENTS (1)])1976*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_default_avx_params(
1977*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_default_params params[XNN_MIN_ELEMENTS(1)])
1978*4bdc9457SAndroid Build Coastguard Worker {
1979*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
1980*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
1981*4bdc9457SAndroid Build Coastguard Worker }
1982*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
1983*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
1984*4bdc9457SAndroid Build Coastguard Worker }
1985*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
1986*4bdc9457SAndroid Build Coastguard Worker }
1987*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1988*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_minmax_params(union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS (1)],float output_min,float output_max)1989*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_minmax_params(
1990*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
1991*4bdc9457SAndroid Build Coastguard Worker float output_min,
1992*4bdc9457SAndroid Build Coastguard Worker float output_max)
1993*4bdc9457SAndroid Build Coastguard Worker {
1994*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
1995*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
1996*4bdc9457SAndroid Build Coastguard Worker params->sse.min[i] = output_min;
1997*4bdc9457SAndroid Build Coastguard Worker params->sse.max[i] = output_max;
1998*4bdc9457SAndroid Build Coastguard Worker }
1999*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2000*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2001*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[0] = output_min;
2002*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[1] = output_min;
2003*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[0] = output_max;
2004*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[1] = output_max;
2005*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
2006*4bdc9457SAndroid Build Coastguard Worker #else
2007*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = output_min;
2008*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = output_max;
2009*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
2010*4bdc9457SAndroid Build Coastguard Worker #endif
2011*4bdc9457SAndroid Build Coastguard Worker }
2012*4bdc9457SAndroid Build Coastguard Worker
2013*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_minmax_sse_params(union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS (1)],float output_min,float output_max)2014*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_minmax_sse_params(
2015*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2016*4bdc9457SAndroid Build Coastguard Worker float output_min,
2017*4bdc9457SAndroid Build Coastguard Worker float output_max)
2018*4bdc9457SAndroid Build Coastguard Worker {
2019*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2020*4bdc9457SAndroid Build Coastguard Worker params->sse.min[i] = output_min;
2021*4bdc9457SAndroid Build Coastguard Worker params->sse.max[i] = output_max;
2022*4bdc9457SAndroid Build Coastguard Worker }
2023*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2024*4bdc9457SAndroid Build Coastguard Worker }
2025*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_minmax_avx_params(union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS (1)],float output_min,float output_max)2026*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_minmax_avx_params(
2027*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2028*4bdc9457SAndroid Build Coastguard Worker float output_min,
2029*4bdc9457SAndroid Build Coastguard Worker float output_max)
2030*4bdc9457SAndroid Build Coastguard Worker {
2031*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2032*4bdc9457SAndroid Build Coastguard Worker params->avx.min[i] = output_min;
2033*4bdc9457SAndroid Build Coastguard Worker params->avx.max[i] = output_max;
2034*4bdc9457SAndroid Build Coastguard Worker }
2035*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2036*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
2037*4bdc9457SAndroid Build Coastguard Worker }
2038*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2039*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
2040*4bdc9457SAndroid Build Coastguard Worker }
2041*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2042*4bdc9457SAndroid Build Coastguard Worker }
2043*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2044*4bdc9457SAndroid Build Coastguard Worker
2045*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_minmax_wasmsimd_params(union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS (1)],float output_min,float output_max)2046*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_minmax_wasmsimd_params(
2047*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2048*4bdc9457SAndroid Build Coastguard Worker float output_min,
2049*4bdc9457SAndroid Build Coastguard Worker float output_max)
2050*4bdc9457SAndroid Build Coastguard Worker {
2051*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[0] = output_min;
2052*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[1] = output_min;
2053*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[0] = output_max;
2054*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[1] = output_max;
2055*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
2056*4bdc9457SAndroid Build Coastguard Worker }
2057*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2058*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_minmax_scalar_params(union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS (1)],float output_min,float output_max)2059*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_minmax_scalar_params(
2060*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params params[XNN_MIN_ELEMENTS(1)],
2061*4bdc9457SAndroid Build Coastguard Worker float output_min,
2062*4bdc9457SAndroid Build Coastguard Worker float output_max)
2063*4bdc9457SAndroid Build Coastguard Worker {
2064*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = output_min;
2065*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = output_max;
2066*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
2067*4bdc9457SAndroid Build Coastguard Worker }
2068*4bdc9457SAndroid Build Coastguard Worker
2069*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_hswish_neon_params(union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS (1)])2070*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_hswish_neon_params(
2071*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS(1)])
2072*4bdc9457SAndroid Build Coastguard Worker {
2073*4bdc9457SAndroid Build Coastguard Worker params->neon.sixth = UINT16_C(0x3155);
2074*4bdc9457SAndroid Build Coastguard Worker params->neon.three = UINT16_C(0x4200);
2075*4bdc9457SAndroid Build Coastguard Worker params->neon.six = UINT16_C(0x4600);
2076*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
2077*4bdc9457SAndroid Build Coastguard Worker }
2078*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2079*4bdc9457SAndroid Build Coastguard Worker
2080*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_hswish_avx_params(union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS (1)])2081*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_hswish_avx_params(
2082*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_hswish_params params[XNN_MIN_ELEMENTS(1)])
2083*4bdc9457SAndroid Build Coastguard Worker {
2084*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2085*4bdc9457SAndroid Build Coastguard Worker params->avx.sixth[i] = 0x1.554000p-3f;
2086*4bdc9457SAndroid Build Coastguard Worker params->avx.three[i] = 3.0f;
2087*4bdc9457SAndroid Build Coastguard Worker params->avx.six[i] = UINT16_C(0x4600);
2088*4bdc9457SAndroid Build Coastguard Worker }
2089*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2090*4bdc9457SAndroid Build Coastguard Worker }
2091*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2092*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_hswish_scalar_params(union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS (1)])2093*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_hswish_scalar_params(
2094*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2095*4bdc9457SAndroid Build Coastguard Worker {
2096*4bdc9457SAndroid Build Coastguard Worker params->scalar.sixth = 0x1.555556p-3f;
2097*4bdc9457SAndroid Build Coastguard Worker params->scalar.three = 3.0f;
2098*4bdc9457SAndroid Build Coastguard Worker params->scalar.six = 6.0f;
2099*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
2100*4bdc9457SAndroid Build Coastguard Worker }
2101*4bdc9457SAndroid Build Coastguard Worker
2102*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_hswish_sse_params(union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS (1)])2103*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_hswish_sse_params(
2104*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2105*4bdc9457SAndroid Build Coastguard Worker {
2106*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2107*4bdc9457SAndroid Build Coastguard Worker params->sse.sixth[i] = 0x1.555556p-3f;
2108*4bdc9457SAndroid Build Coastguard Worker params->sse.half[i] = 0.5f;
2109*4bdc9457SAndroid Build Coastguard Worker params->sse.one[i] = 1.0f;
2110*4bdc9457SAndroid Build Coastguard Worker }
2111*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2112*4bdc9457SAndroid Build Coastguard Worker }
2113*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_hswish_avx_params(union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS (1)])2114*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_hswish_avx_params(
2115*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2116*4bdc9457SAndroid Build Coastguard Worker {
2117*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2118*4bdc9457SAndroid Build Coastguard Worker params->avx.sixth[i] = 0x1.555556p-3f;
2119*4bdc9457SAndroid Build Coastguard Worker params->avx.half[i] = 0.5f;
2120*4bdc9457SAndroid Build Coastguard Worker params->avx.one[i] = 1.0f;
2121*4bdc9457SAndroid Build Coastguard Worker }
2122*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2123*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
2124*4bdc9457SAndroid Build Coastguard Worker }
2125*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2126*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
2127*4bdc9457SAndroid Build Coastguard Worker }
2128*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2129*4bdc9457SAndroid Build Coastguard Worker }
2130*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_hswish_avx512_params(union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS (1)])2131*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_hswish_avx512_params(
2132*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2133*4bdc9457SAndroid Build Coastguard Worker {
2134*4bdc9457SAndroid Build Coastguard Worker params->avx512.sixth = 0x1.555556p-3f;
2135*4bdc9457SAndroid Build Coastguard Worker params->avx512.half = 0.5f;
2136*4bdc9457SAndroid Build Coastguard Worker params->avx512.one = 1.0f;
2137*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
2138*4bdc9457SAndroid Build Coastguard Worker }
2139*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2140*4bdc9457SAndroid Build Coastguard Worker
2141*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_hswish_wasmsimd_params(union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS (1)])2142*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_hswish_wasmsimd_params(
2143*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_hswish_params params[XNN_MIN_ELEMENTS(1)])
2144*4bdc9457SAndroid Build Coastguard Worker {
2145*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
2146*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.sixth[i] = 0x1.555556p-3f;
2147*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.three[i] = 3.0f;
2148*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.six[i] = 6.0f;
2149*4bdc9457SAndroid Build Coastguard Worker }
2150*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
2151*4bdc9457SAndroid Build Coastguard Worker }
2152*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2153*4bdc9457SAndroid Build Coastguard Worker
2154*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_sigmoid_neonfp16arith_rr2_p2_params(union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2155*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_sigmoid_neonfp16arith_rr2_p2_params(
2156*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2157*4bdc9457SAndroid Build Coastguard Worker {
2158*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
2159*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.minus_log2e = UINT16_C(0xBDC5); // -0x1.714p+0h
2160*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.ln2_hi = UINT16_C(0x398C); // 0x1.630p-1h
2161*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.ln2_lo = UINT16_C(0x8AF4); // -0x1.BD0p-13h
2162*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.c2 = UINT16_C(0x37F9); // 0x1.FE4p-2h
2163*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.c1 = UINT16_C(0xBC0E); // -0x1.038p+0h
2164*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.denorm_cutoff = UINT16_C(0xC8DA); // -0x1.368p+3h
2165*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith_rr2_p2);
2166*4bdc9457SAndroid Build Coastguard Worker }
2167*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2168*4bdc9457SAndroid Build Coastguard Worker
2169*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_sigmoid_avx2_rr1_p2_params(union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2170*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_sigmoid_avx2_rr1_p2_params(
2171*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2172*4bdc9457SAndroid Build Coastguard Worker {
2173*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2174*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.sign_mask[i] = -0.0f;
2175*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.magic_bias[i] = 0x1.8000FEp23f;
2176*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.log2e[i] = 0x1.715476p0f;
2177*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.minus_ln2[i] = -0x1.62E43p-1f;
2178*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.c2[i] = 0x1.FF3A32p-2f;
2179*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.c1[i] = 0x1.039E10p+0f;
2180*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.one[i] = 1.0f;
2181*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.denorm_cutoff[i] = -0x1.368000p+3f;
2182*4bdc9457SAndroid Build Coastguard Worker }
2183*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p2);
2184*4bdc9457SAndroid Build Coastguard Worker }
2185*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2186*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2187*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params(
2188*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2189*4bdc9457SAndroid Build Coastguard Worker {
2190*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
2191*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.minus_log2e = -0x1.715476p0f;
2192*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.ln2_hi = 0x1.630000p-1f;
2193*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.ln2_lo = -0x1.BD0106p-13f;
2194*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2195*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.one = 1.0f;
2196*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2197*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_lut64_p2);
2198*4bdc9457SAndroid Build Coastguard Worker }
2199*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2200*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_scalar_rr2_lut2048_p1_params(
2201*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2202*4bdc9457SAndroid Build Coastguard Worker {
2203*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.magic_bias = 0x1.800000p12f;
2204*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.minus_log2e = -0x1.715476p0f;
2205*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.ln2_hi = 0x1.600000p-1f;
2206*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.ln2_lo = 0x1.7217F8p-8f;
2207*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2208*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.one = 1.0f;
2209*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2210*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_lut2048_p1);
2211*4bdc9457SAndroid Build Coastguard Worker }
2212*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_scalar_rr2_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2213*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_scalar_rr2_p5_params(
2214*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2215*4bdc9457SAndroid Build Coastguard Worker {
2216*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.magic_bias = 0x1.8000FEp23f;
2217*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.minus_log2e = -0x1.715476p0f;
2218*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.ln2_hi = 0x1.62E400p-1f;
2219*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.ln2_lo = 0x1.7F7D1Cp-20f;
2220*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c5 = -0x1.0F9F9Cp-7f;
2221*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c4 = 0x1.573A1Ap-5f;
2222*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c3 = -0x1.555A80p-3f;
2223*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c2 = 0x1.FFFDC6p-2f;
2224*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c1 = -0x1.FFFFF6p-1f;
2225*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.one = 1.0f;
2226*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2227*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_p5);
2228*4bdc9457SAndroid Build Coastguard Worker }
2229*4bdc9457SAndroid Build Coastguard Worker
2230*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2231*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params(
2232*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2233*4bdc9457SAndroid Build Coastguard Worker {
2234*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
2235*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.minus_log2e = -0x1.715476p0f;
2236*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.ln2_hi = 0x1.630000p-1f;
2237*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.ln2_lo = -0x1.BD0106p-13f;
2238*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2239*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2240*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_lut64_p2);
2241*4bdc9457SAndroid Build Coastguard Worker }
2242*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2243*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neon_rr2_lut2048_p1_params(
2244*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2245*4bdc9457SAndroid Build Coastguard Worker {
2246*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.magic_bias = 0x1.800000p12f;
2247*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.minus_log2e = -0x1.715476p0f;
2248*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.ln2_hi = 0x1.600000p-1f;
2249*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.ln2_lo = 0x1.7217F8p-8f;
2250*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2251*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2252*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_lut2048_p1);
2253*4bdc9457SAndroid Build Coastguard Worker }
2254*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_neon_rr2_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2255*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neon_rr2_p5_params(
2256*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2257*4bdc9457SAndroid Build Coastguard Worker {
2258*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.magic_bias = 0x1.8000FEp23f;
2259*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.minus_log2e = -0x1.715476p0f;
2260*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.ln2_hi = 0x1.62E400p-1f;
2261*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.ln2_lo = 0x1.7F7D1Cp-20f;
2262*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c5 = -0x1.0F9F9Cp-7f;
2263*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c4 = 0x1.573A1Ap-5f;
2264*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c3 = -0x1.555A80p-3f;
2265*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c2 = 0x1.FFFDC6p-2f;
2266*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c1 = -0x1.FFFFF6p-1f;
2267*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2268*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_p5);
2269*4bdc9457SAndroid Build Coastguard Worker }
2270*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2271*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neonfma_rr1_lut2048_p1_params(
2272*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2273*4bdc9457SAndroid Build Coastguard Worker {
2274*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut2048_p1.magic_bias = 0x1.800000p12f;
2275*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut2048_p1.minus_log2e = -0x1.715476p0f;
2276*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut2048_p1.ln2 = 0x1.62E430p-1f;
2277*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut2048_p1.c1 = -0x1.FFFFFEp-1f;
2278*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut2048_p1.denorm_cutoff = 0x1.5D589Ep+6f;
2279*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_lut2048_p1);
2280*4bdc9457SAndroid Build Coastguard Worker }
2281*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2282*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params(
2283*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2284*4bdc9457SAndroid Build Coastguard Worker {
2285*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.magic_bias = 0x1.800000p17f;
2286*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.minus_log2e = -0x1.715476p0f;
2287*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.ln2 = 0x1.62E430p-1f;
2288*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
2289*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.denorm_cutoff = 0x1.5D589Ep+6f;
2290*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_lut64_p2);
2291*4bdc9457SAndroid Build Coastguard Worker }
2292*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_neonfma_rr1_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2293*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_neonfma_rr1_p5_params(
2294*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2295*4bdc9457SAndroid Build Coastguard Worker {
2296*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.magic_bias = 0x1.8000FEp23f;
2297*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.minus_log2e = -0x1.715476p0f;
2298*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.ln2 = 0x1.62E430p-1f;
2299*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c5 = -0x1.0F9F9Cp-7f;
2300*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c4 = 0x1.573A1Ap-5f;
2301*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c3 = -0x1.555A80p-3f;
2302*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c2 = 0x1.FFFDC6p-2f;
2303*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c1 = -0x1.FFFFF6p-1f;
2304*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.denorm_cutoff = 0x1.5D589Ep+6f;
2305*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_p5);
2306*4bdc9457SAndroid Build Coastguard Worker }
2307*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2308*4bdc9457SAndroid Build Coastguard Worker
2309*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2310*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params(
2311*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2312*4bdc9457SAndroid Build Coastguard Worker {
2313*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2314*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.sign_mask[i] = -0.0f;
2315*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.magic_bias[i] = 0x1.800000p17f;
2316*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.log2e[i] = 0x1.715476p0f;
2317*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.index_mask[i] = UINT32_C(0x3F);
2318*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.minus_ln2_hi[i] = -0x1.630000p-1f;
2319*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.minus_ln2_lo[i] = 0x1.BD0106p-13f;
2320*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.c2[i] = 0x1.FFFF0Ap-2f;
2321*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.one[i] = 1.0f;
2322*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut64_p2.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2323*4bdc9457SAndroid Build Coastguard Worker }
2324*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2_rr2_lut64_p2);
2325*4bdc9457SAndroid Build Coastguard Worker }
2326*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_sse2_rr2_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2327*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_sse2_rr2_p5_params(
2328*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2329*4bdc9457SAndroid Build Coastguard Worker {
2330*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2331*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.sign_mask[i] = -0.0f;
2332*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2333*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.log2e[i] = 0x1.715476p0f;
2334*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
2335*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2336*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
2337*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2338*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c3[i] = 0x1.555A80p-3f;
2339*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2340*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
2341*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.one[i] = 1.0f;
2342*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2343*4bdc9457SAndroid Build Coastguard Worker }
2344*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2_rr2_p5);
2345*4bdc9457SAndroid Build Coastguard Worker }
2346*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_avx_rr2_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2347*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_avx_rr2_p5_params(
2348*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2349*4bdc9457SAndroid Build Coastguard Worker {
2350*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2351*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.sign_mask[i] = -0.0f;
2352*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2353*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.log2e[i] = 0x1.715476p0f;
2354*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
2355*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2356*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
2357*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2358*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.c3[i] = 0x1.555A80p-3f;
2359*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2360*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
2361*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.one[i] = 1.0f;
2362*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.two[i] = 2.0f;
2363*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2364*4bdc9457SAndroid Build Coastguard Worker }
2365*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2366*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.mask_table[i] = -1;
2367*4bdc9457SAndroid Build Coastguard Worker }
2368*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2369*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p5.mask_table[i] = 0;
2370*4bdc9457SAndroid Build Coastguard Worker }
2371*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx_rr2_p5);
2372*4bdc9457SAndroid Build Coastguard Worker }
2373*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_avx2_rr1_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2374*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_avx2_rr1_p5_params(
2375*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2376*4bdc9457SAndroid Build Coastguard Worker {
2377*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2378*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.sign_mask[i] = -0.0f;
2379*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.magic_bias[i] = 0x1.8000FEp23f;
2380*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.log2e[i] = 0x1.715476p0f;
2381*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.minus_ln2[i] = -0x1.62E430p-1f;
2382*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c5[i] = 0x1.0F9F9Cp-7f;
2383*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c4[i] = 0x1.573A1Ap-5f;
2384*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c3[i] = 0x1.555A80p-3f;
2385*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c2[i] = 0x1.FFFDC6p-2f;
2386*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c1[i] = 0x1.FFFFF6p-1f;
2387*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.one[i] = 1.0f;
2388*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.denorm_cutoff[i] = -0x1.5D589Ep+6f;
2389*4bdc9457SAndroid Build Coastguard Worker }
2390*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2391*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.mask_table[i] = -1;
2392*4bdc9457SAndroid Build Coastguard Worker }
2393*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2394*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.mask_table[i] = 0;
2395*4bdc9457SAndroid Build Coastguard Worker }
2396*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p5);
2397*4bdc9457SAndroid Build Coastguard Worker }
2398*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2399*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_avx512_rr1_lut16_p3_params(
2400*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2401*4bdc9457SAndroid Build Coastguard Worker {
2402*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.sign_mask = UINT32_C(0x80000000);
2403*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
2404*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.log2e = 0x1.715476p0f;
2405*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
2406*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.c3 = 0x1.55559Ap-3f;
2407*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.c2 = 0x1.00021Ep-1f;
2408*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.one = 1.0f;
2409*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 0] = 0x1.000000p+0f;
2410*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 1] = 0x1.0B5586p+0f;
2411*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 2] = 0x1.172B84p+0f;
2412*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 3] = 0x1.2387A6p+0f;
2413*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 4] = 0x1.306FE0p+0f;
2414*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 5] = 0x1.3DEA64p+0f;
2415*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 6] = 0x1.4BFDAEp+0f;
2416*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 7] = 0x1.5AB07Ep+0f;
2417*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 8] = 0x1.6A09E6p+0f;
2418*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 9] = 0x1.7A1148p+0f;
2419*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[10] = 0x1.8ACE54p+0f;
2420*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[11] = 0x1.9C4918p+0f;
2421*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[12] = 0x1.AE89FAp+0f;
2422*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[13] = 0x1.C199BEp+0f;
2423*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[14] = 0x1.D5818Ep+0f;
2424*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[15] = 0x1.EA4AFAp+0f;
2425*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr1_lut16_p3);
2426*4bdc9457SAndroid Build Coastguard Worker }
2427*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2428*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params(
2429*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2430*4bdc9457SAndroid Build Coastguard Worker {
2431*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.sign_mask = UINT32_C(0x80000000);
2432*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.magic_bias = 0x1.800000p18f;
2433*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.log2e = 0x1.715476p0f;
2434*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.minus_ln2_hi = -0x1.62E430p-1f;
2435*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.minus_ln2_lo = 0x1.05C61p-29f;
2436*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.c2 = 0x1.000000p-1f;
2437*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.c1 = 0x1.0000F6p-0f;
2438*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.one = 1.0f;
2439*4bdc9457SAndroid Build Coastguard Worker
2440*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 0] = 0x1.000000p+0f;
2441*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 1] = 0x1.059B0Ep+0f;
2442*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 2] = 0x1.0B5586p+0f;
2443*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 3] = 0x1.11301Ep+0f;
2444*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 4] = 0x1.172B84p+0f;
2445*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 5] = 0x1.1D4874p+0f;
2446*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 6] = 0x1.2387A6p+0f;
2447*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 7] = 0x1.29E9E0p+0f;
2448*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 8] = 0x1.306FE0p+0f;
2449*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[ 9] = 0x1.371A74p+0f;
2450*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[10] = 0x1.3DEA64p+0f;
2451*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[11] = 0x1.44E086p+0f;
2452*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[12] = 0x1.4BFDAEp+0f;
2453*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[13] = 0x1.5342B6p+0f;
2454*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[14] = 0x1.5AB07Ep+0f;
2455*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_lo[15] = 0x1.6247ECp+0f;
2456*4bdc9457SAndroid Build Coastguard Worker
2457*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 0] = 0x1.6A09E6p+0f;
2458*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 1] = 0x1.71F75Ep+0f;
2459*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 2] = 0x1.7A1148p+0f;
2460*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 3] = 0x1.82589Ap+0f;
2461*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 4] = 0x1.8ACE54p+0f;
2462*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 5] = 0x1.93737Cp+0f;
2463*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 6] = 0x1.9C4918p+0f;
2464*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 7] = 0x1.A5503Cp+0f;
2465*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 8] = 0x1.AE89FAp+0f;
2466*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[ 9] = 0x1.B7F770p+0f;
2467*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[10] = 0x1.C199BEp+0f;
2468*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[11] = 0x1.CB720Ep+0f;
2469*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[12] = 0x1.D5818Ep+0f;
2470*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[13] = 0x1.DFC974p+0f;
2471*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[14] = 0x1.EA4AFAp+0f;
2472*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr2_lut32_p2.table_hi[15] = 0x1.F50766p+0f;
2473*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr2_lut32_p2);
2474*4bdc9457SAndroid Build Coastguard Worker }
2475*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_avx512_rr1_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2476*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_avx512_rr1_p5_params(
2477*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2478*4bdc9457SAndroid Build Coastguard Worker {
2479*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.sign_mask = UINT32_C(0x80000000);
2480*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.log2e = 0x1.715476p0f;
2481*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
2482*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
2483*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c4 = 0x1.573A1Ap-5f;
2484*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c3 = 0x1.555A80p-3f;
2485*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c2 = 0x1.FFFDC6p-2f;
2486*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c1 = 0x1.FFFFF6p-1f;
2487*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.one = 1.0f;
2488*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr1_p5);
2489*4bdc9457SAndroid Build Coastguard Worker }
2490*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2491*4bdc9457SAndroid Build Coastguard Worker
2492*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2493*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_wasmsimd_rr2_lut64_p2_params(
2494*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2495*4bdc9457SAndroid Build Coastguard Worker {
2496*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
2497*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.magic_bias[i] = 0x1.800000p17f;
2498*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.minus_log2e[i] = -0x1.715476p0f;
2499*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.index_mask[i] = UINT32_C(0x3F);
2500*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.ln2_hi[i] = 0x1.630000p-1f;
2501*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.ln2_lo[i] = -0x1.BD0106p-13f;
2502*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.c2[i] = 0x1.FFFF0Ap-2f;
2503*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.one[i] = 1.0f;
2504*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut64_p2.denorm_cutoff[i] = 0x1.5D589Ep+6f;
2505*4bdc9457SAndroid Build Coastguard Worker }
2506*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_rr2_lut64_p2);
2507*4bdc9457SAndroid Build Coastguard Worker }
2508*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params(union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS (1)])2509*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params(
2510*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sigmoid_params params[XNN_MIN_ELEMENTS(1)])
2511*4bdc9457SAndroid Build Coastguard Worker {
2512*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
2513*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
2514*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.minus_log2e[i] = -0x1.715476p+0f;
2515*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.ln2_hi[i] = 0x1.62E400p-1f;
2516*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.ln2_lo[i] = 0x1.7F7D1Cp-20f;
2517*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c5[i] = -0x1.0F9F9Cp-7f;
2518*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
2519*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c3[i] = -0x1.555A80p-3f;
2520*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
2521*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c1[i] = -0x1.FFFFF6p-1f;
2522*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.one[i] = 1.0f;
2523*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.denorm_cutoff[i] = 0x1.5D589Ep+6f;
2524*4bdc9457SAndroid Build Coastguard Worker }
2525*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_rr2_p5);
2526*4bdc9457SAndroid Build Coastguard Worker }
2527*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2528*4bdc9457SAndroid Build Coastguard Worker
2529*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_abs_sse_params(union xnn_f16_abs_params params[XNN_MIN_ELEMENTS (1)])2530*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_abs_sse_params(
2531*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_abs_params params[XNN_MIN_ELEMENTS(1)])
2532*4bdc9457SAndroid Build Coastguard Worker {
2533*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2534*4bdc9457SAndroid Build Coastguard Worker params->sse.nonsign_mask[i] = UINT16_C(0x7FFF);
2535*4bdc9457SAndroid Build Coastguard Worker }
2536*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2537*4bdc9457SAndroid Build Coastguard Worker }
2538*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2539*4bdc9457SAndroid Build Coastguard Worker
2540*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_abs_sse_params(union xnn_f32_abs_params params[XNN_MIN_ELEMENTS (1)])2541*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_abs_sse_params(
2542*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2543*4bdc9457SAndroid Build Coastguard Worker {
2544*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2545*4bdc9457SAndroid Build Coastguard Worker params->sse.nonsign_mask[i] = math_nonsign_mask_f32();
2546*4bdc9457SAndroid Build Coastguard Worker }
2547*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2548*4bdc9457SAndroid Build Coastguard Worker }
2549*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_abs_avx_params(union xnn_f32_abs_params params[XNN_MIN_ELEMENTS (1)])2550*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_abs_avx_params(
2551*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2552*4bdc9457SAndroid Build Coastguard Worker {
2553*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2554*4bdc9457SAndroid Build Coastguard Worker params->avx.nonsign_mask[i] = math_nonsign_mask_f32();
2555*4bdc9457SAndroid Build Coastguard Worker }
2556*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2557*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
2558*4bdc9457SAndroid Build Coastguard Worker }
2559*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2560*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
2561*4bdc9457SAndroid Build Coastguard Worker }
2562*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2563*4bdc9457SAndroid Build Coastguard Worker }
2564*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_abs_avx512_params(union xnn_f32_abs_params params[XNN_MIN_ELEMENTS (1)])2565*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_abs_avx512_params(
2566*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2567*4bdc9457SAndroid Build Coastguard Worker {
2568*4bdc9457SAndroid Build Coastguard Worker params->avx512.nonsign_mask = UINT32_C(0x7FFFFFFF);
2569*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
2570*4bdc9457SAndroid Build Coastguard Worker }
2571*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2572*4bdc9457SAndroid Build Coastguard Worker
2573*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_abs_wasmsimd_params(union xnn_f32_abs_params params[XNN_MIN_ELEMENTS (1)])2574*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_abs_wasmsimd_params(
2575*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_abs_params params[XNN_MIN_ELEMENTS(1)])
2576*4bdc9457SAndroid Build Coastguard Worker {
2577*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.nonsign_mask[0] = math_nonsign_mask_f32();
2578*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.nonsign_mask[1] = math_nonsign_mask_f32();
2579*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
2580*4bdc9457SAndroid Build Coastguard Worker }
2581*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2582*4bdc9457SAndroid Build Coastguard Worker
2583*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_neg_sse_params(union xnn_f16_neg_params params[XNN_MIN_ELEMENTS (1)])2584*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_neg_sse_params(
2585*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_neg_params params[XNN_MIN_ELEMENTS(1)])
2586*4bdc9457SAndroid Build Coastguard Worker {
2587*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2588*4bdc9457SAndroid Build Coastguard Worker params->sse.sign_mask[i] = UINT16_C(0x8000);
2589*4bdc9457SAndroid Build Coastguard Worker }
2590*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2591*4bdc9457SAndroid Build Coastguard Worker }
2592*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2593*4bdc9457SAndroid Build Coastguard Worker
2594*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_neg_sse_params(union xnn_f32_neg_params params[XNN_MIN_ELEMENTS (1)])2595*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_neg_sse_params(
2596*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2597*4bdc9457SAndroid Build Coastguard Worker {
2598*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2599*4bdc9457SAndroid Build Coastguard Worker params->sse.sign_mask[i] = -0.0f;
2600*4bdc9457SAndroid Build Coastguard Worker }
2601*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
2602*4bdc9457SAndroid Build Coastguard Worker }
2603*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_neg_avx_params(union xnn_f32_neg_params params[XNN_MIN_ELEMENTS (1)])2604*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_neg_avx_params(
2605*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2606*4bdc9457SAndroid Build Coastguard Worker {
2607*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2608*4bdc9457SAndroid Build Coastguard Worker params->avx.sign_mask[i] = -0.0f;
2609*4bdc9457SAndroid Build Coastguard Worker }
2610*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2611*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
2612*4bdc9457SAndroid Build Coastguard Worker }
2613*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2614*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
2615*4bdc9457SAndroid Build Coastguard Worker }
2616*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2617*4bdc9457SAndroid Build Coastguard Worker }
2618*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_neg_avx512_params(union xnn_f32_neg_params params[XNN_MIN_ELEMENTS (1)])2619*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_neg_avx512_params(
2620*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2621*4bdc9457SAndroid Build Coastguard Worker {
2622*4bdc9457SAndroid Build Coastguard Worker params->avx512.sign_mask = UINT32_C(0x80000000);
2623*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
2624*4bdc9457SAndroid Build Coastguard Worker }
2625*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2626*4bdc9457SAndroid Build Coastguard Worker
2627*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_neg_wasmsimd_params(union xnn_f32_neg_params params[XNN_MIN_ELEMENTS (1)])2628*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_neg_wasmsimd_params(
2629*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_neg_params params[XNN_MIN_ELEMENTS(1)])
2630*4bdc9457SAndroid Build Coastguard Worker {
2631*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.sign_mask[0] = -0.0f;
2632*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.sign_mask[1] = -0.0f;
2633*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
2634*4bdc9457SAndroid Build Coastguard Worker }
2635*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2636*4bdc9457SAndroid Build Coastguard Worker
2637*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_rnd_sse2_params(union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS (1)])2638*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_rnd_sse2_params(
2639*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS(1)])
2640*4bdc9457SAndroid Build Coastguard Worker {
2641*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2642*4bdc9457SAndroid Build Coastguard Worker params->sse2.sign_mask[i] = -0.0f;
2643*4bdc9457SAndroid Build Coastguard Worker params->sse2.one[i] = 1.0f;
2644*4bdc9457SAndroid Build Coastguard Worker }
2645*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
2646*4bdc9457SAndroid Build Coastguard Worker }
2647*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_rnd_avx_params(union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS (1)])2648*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_rnd_avx_params(
2649*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_rnd_params params[XNN_MIN_ELEMENTS(1)])
2650*4bdc9457SAndroid Build Coastguard Worker {
2651*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2652*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
2653*4bdc9457SAndroid Build Coastguard Worker }
2654*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2655*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
2656*4bdc9457SAndroid Build Coastguard Worker }
2657*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
2658*4bdc9457SAndroid Build Coastguard Worker }
2659*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2660*4bdc9457SAndroid Build Coastguard Worker
2661*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_elu_neonfp16arith_rr1_p3_params(union xnn_f16_elu_params params[XNN_MIN_ELEMENTS (1)],uint16_t prescale,uint16_t alpha,uint16_t beta)2662*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_elu_neonfp16arith_rr1_p3_params(
2663*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_elu_params params[XNN_MIN_ELEMENTS(1)],
2664*4bdc9457SAndroid Build Coastguard Worker uint16_t prescale,
2665*4bdc9457SAndroid Build Coastguard Worker uint16_t alpha,
2666*4bdc9457SAndroid Build Coastguard Worker uint16_t beta)
2667*4bdc9457SAndroid Build Coastguard Worker {
2668*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.prescale = prescale;
2669*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.sat_cutoff = UINT16_C(0xC829); // -0x1.0A4p+3h;
2670*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
2671*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.log2e = UINT16_C(0x3DC5); // 0x1.714p+0h
2672*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.minus_ln2 = UINT16_C(0xB98C); // -0x1.62E430p-1h
2673*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.c3 = UINT16_C(0x315B); // 0x1.56Cp-3h
2674*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.c2 = UINT16_C(0x3808); // 0x1.020p-1h
2675*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.minus_alpha = alpha ^ UINT16_C(0x8000);
2676*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr1_p3.beta = beta;
2677*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith_rr1_p3);
2678*4bdc9457SAndroid Build Coastguard Worker }
2679*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2680*4bdc9457SAndroid Build Coastguard Worker
2681*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_elu_avx2_rr1_p3_params(union xnn_f16_elu_params params[XNN_MIN_ELEMENTS (1)],uint16_t prescale,uint16_t alpha,uint16_t beta)2682*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_elu_avx2_rr1_p3_params(
2683*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_elu_params params[XNN_MIN_ELEMENTS(1)],
2684*4bdc9457SAndroid Build Coastguard Worker uint16_t prescale,
2685*4bdc9457SAndroid Build Coastguard Worker uint16_t alpha,
2686*4bdc9457SAndroid Build Coastguard Worker uint16_t beta)
2687*4bdc9457SAndroid Build Coastguard Worker {
2688*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2689*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.prescale[i] = fp16_ieee_to_fp32_value(prescale);
2690*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.sat_cutoff[i] = -0x1.0A4000p+3f;
2691*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.magic_bias[i] = 0x1.8000FEp23f;
2692*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.log2e[i] = 0x1.715476p+0f;
2693*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.minus_ln2[i] = -0x1.62E430p-1f;
2694*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.c3[i] = 0x1.5554DCp-3f;
2695*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.c2[i] = 0x1.01EBB2p-1f;
2696*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.c1[i] = 0x1.0002F2p+0f;
2697*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.alpha[i] = fp16_ieee_to_fp32_value(alpha);
2698*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p3.beta[i] = fp16_ieee_to_fp32_value(beta);
2699*4bdc9457SAndroid Build Coastguard Worker }
2700*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p3);
2701*4bdc9457SAndroid Build Coastguard Worker }
2702*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2703*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_scalar_rr2_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2704*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_scalar_rr2_lut16_p3_params(
2705*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2706*4bdc9457SAndroid Build Coastguard Worker float prescale,
2707*4bdc9457SAndroid Build Coastguard Worker float alpha,
2708*4bdc9457SAndroid Build Coastguard Worker float beta)
2709*4bdc9457SAndroid Build Coastguard Worker {
2710*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.prescale = prescale;
2711*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.alpha = alpha;
2712*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.beta = beta;
2713*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2714*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.magic_bias = 0x1.800000p19f;
2715*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.log2e = 0x1.715476p+0f;
2716*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.minus_ln2_hi = -0x1.62E400p-1f;
2717*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.minus_ln2_lo = -0x1.7F7D1Cp-20f;
2718*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.c3 = 0x1.55561Cp-3f;
2719*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.c2 = 0x1.0001ECp-1f;
2720*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut16_p3.one = 1.0f;
2721*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_lut16_p3);
2722*4bdc9457SAndroid Build Coastguard Worker }
2723*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_scalar_rr2_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2724*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_scalar_rr2_p6_params(
2725*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2726*4bdc9457SAndroid Build Coastguard Worker float prescale,
2727*4bdc9457SAndroid Build Coastguard Worker float alpha,
2728*4bdc9457SAndroid Build Coastguard Worker float beta)
2729*4bdc9457SAndroid Build Coastguard Worker {
2730*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.prescale = prescale;
2731*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.alpha = alpha;
2732*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.beta = beta;
2733*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.sat_cutoff = -0x1.154246p+4f;
2734*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.magic_bias = 0x1.8000FEp23f;
2735*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.log2e = 0x1.715476p+0f;
2736*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.minus_ln2_hi = -0x1.62E440p-1f;
2737*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.minus_ln2_lo = 0x1.0105C6p-21f;
2738*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.c6 = 0x1.6b7338p-10f;
2739*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.c5 = 0x1.12278Ep-7f;
2740*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.c4 = 0x1.555716p-5f;
2741*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.c3 = 0x1.5554B0p-3f;
2742*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.c2 = 0x1.FFFFFEp-2f;
2743*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p6.one = 1.0f;
2744*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_p6);
2745*4bdc9457SAndroid Build Coastguard Worker }
2746*4bdc9457SAndroid Build Coastguard Worker
2747*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_elu_neon_rr2_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2748*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_neon_rr2_lut16_p3_params(
2749*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2750*4bdc9457SAndroid Build Coastguard Worker float prescale,
2751*4bdc9457SAndroid Build Coastguard Worker float alpha,
2752*4bdc9457SAndroid Build Coastguard Worker float beta)
2753*4bdc9457SAndroid Build Coastguard Worker {
2754*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.prescale = prescale;
2755*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.alpha = alpha;
2756*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.beta = beta;
2757*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2758*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.magic_bias = 0x1.800000p19f;
2759*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.log2e = 0x1.715476p+0f;
2760*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.minus_ln2_hi = -0x1.62E400p-1f;
2761*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.minus_ln2_lo = -0x1.7F7D1Cp-20f;
2762*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.c3 = 0x1.55561Cp-3f;
2763*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut16_p3.c2 = 0x1.0001ECp-1f;
2764*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_lut16_p3);
2765*4bdc9457SAndroid Build Coastguard Worker }
2766*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_neon_rr2_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2767*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_neon_rr2_p6_params(
2768*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2769*4bdc9457SAndroid Build Coastguard Worker float prescale,
2770*4bdc9457SAndroid Build Coastguard Worker float alpha,
2771*4bdc9457SAndroid Build Coastguard Worker float beta)
2772*4bdc9457SAndroid Build Coastguard Worker {
2773*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.prescale = prescale;
2774*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.alpha = alpha;
2775*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.beta = beta;
2776*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.sat_cutoff = -0x1.154246p+4f;
2777*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.magic_bias = 0x1.8000FEp23f;
2778*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.log2e = 0x1.715476p+0f;
2779*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.minus_ln2_hi = -0x1.62E440p-1f;
2780*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.minus_ln2_lo = 0x1.0105C6p-21f;
2781*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.c6 = 0x1.6b7338p-10f;
2782*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.c5 = 0x1.12278Ep-7f;
2783*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.c4 = 0x1.555716p-5f;
2784*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.c3 = 0x1.5554B0p-3f;
2785*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p6.c2 = 0x1.FFFFFEp-2f;
2786*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_p6);
2787*4bdc9457SAndroid Build Coastguard Worker }
2788*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_neonfma_rr1_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2789*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_neonfma_rr1_lut16_p3_params(
2790*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2791*4bdc9457SAndroid Build Coastguard Worker float prescale,
2792*4bdc9457SAndroid Build Coastguard Worker float alpha,
2793*4bdc9457SAndroid Build Coastguard Worker float beta)
2794*4bdc9457SAndroid Build Coastguard Worker {
2795*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.prescale = prescale;
2796*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.alpha = alpha;
2797*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.beta = beta;
2798*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.sat_cutoff = -0x1.154246p+4f;
2799*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
2800*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.log2e = 0x1.715476p+0f;
2801*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
2802*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.c3 = 0x1.55561Cp-3f;
2803*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut16_p3.c2 = 0x1.0001ECp-1f;
2804*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_lut16_p3);
2805*4bdc9457SAndroid Build Coastguard Worker }
2806*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_neonfma_rr1_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2807*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_neonfma_rr1_p6_params(
2808*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2809*4bdc9457SAndroid Build Coastguard Worker float prescale,
2810*4bdc9457SAndroid Build Coastguard Worker float alpha,
2811*4bdc9457SAndroid Build Coastguard Worker float beta)
2812*4bdc9457SAndroid Build Coastguard Worker {
2813*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.prescale = prescale;
2814*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.alpha = alpha;
2815*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.beta = beta;
2816*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.sat_cutoff = -0x1.154246p+4f;
2817*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.magic_bias = 0x1.8000FEp23f;
2818*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.log2e = 0x1.715476p+0f;
2819*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.minus_ln2 = -0x1.62E430p-1f;
2820*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.c6 = 0x1.6b7338p-10f;
2821*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.c5 = 0x1.12278Ep-7f;
2822*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.c4 = 0x1.555716p-5f;
2823*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.c3 = 0x1.5554B0p-3f;
2824*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p6.c2 = 0x1.FFFFFEp-2f;
2825*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_p6);
2826*4bdc9457SAndroid Build Coastguard Worker }
2827*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2828*4bdc9457SAndroid Build Coastguard Worker
2829*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_elu_sse2_rr2_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2830*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_sse2_rr2_lut16_p3_params(
2831*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2832*4bdc9457SAndroid Build Coastguard Worker float prescale,
2833*4bdc9457SAndroid Build Coastguard Worker float alpha,
2834*4bdc9457SAndroid Build Coastguard Worker float beta)
2835*4bdc9457SAndroid Build Coastguard Worker {
2836*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2837*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.prescale[i] = prescale;
2838*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.alpha[i] = alpha;
2839*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.beta[i] = beta;
2840*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
2841*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
2842*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
2843*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
2844*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
2845*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2846*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
2847*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
2848*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_lut16_p3.one[i] = 1.0f;
2849*4bdc9457SAndroid Build Coastguard Worker }
2850*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2_rr2_lut16_p3);
2851*4bdc9457SAndroid Build Coastguard Worker }
2852*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_sse2_rr2_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2853*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_sse2_rr2_p6_params(
2854*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2855*4bdc9457SAndroid Build Coastguard Worker float prescale,
2856*4bdc9457SAndroid Build Coastguard Worker float alpha,
2857*4bdc9457SAndroid Build Coastguard Worker float beta)
2858*4bdc9457SAndroid Build Coastguard Worker {
2859*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
2860*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.prescale[i] = prescale;
2861*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.alpha[i] = alpha;
2862*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.beta[i] = beta;
2863*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
2864*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
2865*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.log2e[i] = 0x1.715476p+0f;
2866*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
2867*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
2868*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.c6[i] = 0x1.6b7338p-10f;
2869*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.c5[i] = 0x1.12278Ep-7f;
2870*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.c4[i] = 0x1.555716p-5f;
2871*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.c3[i] = 0x1.5554B0p-3f;
2872*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
2873*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p6.one[i] = 1.0f;
2874*4bdc9457SAndroid Build Coastguard Worker }
2875*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2_rr2_p6);
2876*4bdc9457SAndroid Build Coastguard Worker }
2877*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx_rr2_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2878*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx_rr2_lut16_p3_params(
2879*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2880*4bdc9457SAndroid Build Coastguard Worker float prescale,
2881*4bdc9457SAndroid Build Coastguard Worker float alpha,
2882*4bdc9457SAndroid Build Coastguard Worker float beta)
2883*4bdc9457SAndroid Build Coastguard Worker {
2884*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2885*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.prescale[i] = prescale;
2886*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.alpha[i] = alpha;
2887*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.beta[i] = beta;
2888*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
2889*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
2890*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
2891*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
2892*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
2893*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2894*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
2895*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
2896*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.one[i] = 1.0f;
2897*4bdc9457SAndroid Build Coastguard Worker }
2898*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2899*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.mask_table[i] = -1;
2900*4bdc9457SAndroid Build Coastguard Worker }
2901*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2902*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut16_p3.mask_table[i] = 0;
2903*4bdc9457SAndroid Build Coastguard Worker }
2904*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx_rr2_lut16_p3);
2905*4bdc9457SAndroid Build Coastguard Worker }
2906*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx_rr2_lut4_p4_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2907*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx_rr2_lut4_p4_params(
2908*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2909*4bdc9457SAndroid Build Coastguard Worker float prescale,
2910*4bdc9457SAndroid Build Coastguard Worker float alpha,
2911*4bdc9457SAndroid Build Coastguard Worker float beta)
2912*4bdc9457SAndroid Build Coastguard Worker {
2913*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2914*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.prescale[i] = prescale;
2915*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.alpha[i] = alpha;
2916*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.beta[i] = beta;
2917*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.sat_cutoff[i] = -0x1.154246p+4f;
2918*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.magic_bias[i] = 0x1.8003F8p21f;
2919*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.log2e[i] = 0x1.715476p+0f;
2920*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.index_mask[i] = UINT32_C(0x3);
2921*4bdc9457SAndroid Build Coastguard Worker }
2922*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[0] = 0x1.000000p+0f;
2923*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[1] = 0x1.306FE0p+0f;
2924*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[2] = 0x1.6A09E6p+0f;
2925*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[3] = 0x1.AE89FAp+0f;
2926*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[4] = 0x1.000000p+0f;
2927*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[5] = 0x1.306FE0p+0f;
2928*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[6] = 0x1.6A09E6p+0f;
2929*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.table[7] = 0x1.AE89FAp+0f;
2930*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2931*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.minus_ln2_hi[i] = -0x1.62E400p-1f;
2932*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
2933*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.c4[i] = 0x1.554F9Ap-5f;
2934*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.c3[i] = 0x1.557082p-3f;
2935*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.c2[i] = 0x1.000002p-1f;
2936*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.one[i] = 1.0f;
2937*4bdc9457SAndroid Build Coastguard Worker }
2938*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2939*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.mask_table[i] = -1;
2940*4bdc9457SAndroid Build Coastguard Worker }
2941*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2942*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_lut4_p4.mask_table[i] = 0;
2943*4bdc9457SAndroid Build Coastguard Worker }
2944*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx_rr2_lut4_p4);
2945*4bdc9457SAndroid Build Coastguard Worker }
2946*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx_rr2_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2947*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx_rr2_p6_params(
2948*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2949*4bdc9457SAndroid Build Coastguard Worker float prescale,
2950*4bdc9457SAndroid Build Coastguard Worker float alpha,
2951*4bdc9457SAndroid Build Coastguard Worker float beta)
2952*4bdc9457SAndroid Build Coastguard Worker {
2953*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2954*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.prescale[i] = prescale;
2955*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.alpha[i] = alpha;
2956*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.beta[i] = beta;
2957*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
2958*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
2959*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.log2e[i] = 0x1.715476p+0f;
2960*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
2961*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
2962*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.c6[i] = 0x1.6b7338p-10f;
2963*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.c5[i] = 0x1.12278Ep-7f;
2964*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.c4[i] = 0x1.555716p-5f;
2965*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.c3[i] = 0x1.5554B0p-3f;
2966*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
2967*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.one[i] = 1.0f;
2968*4bdc9457SAndroid Build Coastguard Worker }
2969*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2970*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.mask_table[i] = -1;
2971*4bdc9457SAndroid Build Coastguard Worker }
2972*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
2973*4bdc9457SAndroid Build Coastguard Worker params->avx_rr2_p6.mask_table[i] = 0;
2974*4bdc9457SAndroid Build Coastguard Worker }
2975*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx_rr2_p6);
2976*4bdc9457SAndroid Build Coastguard Worker }
2977*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx2_rr1_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)2978*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx2_rr1_lut16_p3_params(
2979*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
2980*4bdc9457SAndroid Build Coastguard Worker float prescale,
2981*4bdc9457SAndroid Build Coastguard Worker float alpha,
2982*4bdc9457SAndroid Build Coastguard Worker float beta)
2983*4bdc9457SAndroid Build Coastguard Worker {
2984*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
2985*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.prescale[i] = prescale;
2986*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.alpha[i] = alpha;
2987*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.beta[i] = beta;
2988*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
2989*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.magic_bias[i] = 0x1.800000p19f;
2990*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.log2e[i] = 0x1.715476p+0f;
2991*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.index_mask[i] = UINT32_C(0xF);
2992*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.minus_ln2[i] = -0x1.62E430p-1f;
2993*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.c3[i] = 0x1.55561Cp-3f;
2994*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.c2[i] = 0x1.0001ECp-1f;
2995*4bdc9457SAndroid Build Coastguard Worker }
2996*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
2997*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.mask_table[i] = -1;
2998*4bdc9457SAndroid Build Coastguard Worker }
2999*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3000*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut16_p3.mask_table[i] = 0;
3001*4bdc9457SAndroid Build Coastguard Worker }
3002*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_lut16_p3);
3003*4bdc9457SAndroid Build Coastguard Worker }
3004*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx2_rr1_lut8_p4_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3005*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx2_rr1_lut8_p4_params(
3006*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3007*4bdc9457SAndroid Build Coastguard Worker float prescale,
3008*4bdc9457SAndroid Build Coastguard Worker float alpha,
3009*4bdc9457SAndroid Build Coastguard Worker float beta)
3010*4bdc9457SAndroid Build Coastguard Worker {
3011*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3012*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.prescale[i] = prescale;
3013*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.alpha[i] = alpha;
3014*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.beta[i] = beta;
3015*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.sat_cutoff[i] = -0x1.154246p+4f;
3016*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.magic_bias[i] = 0x1.800000p20f;
3017*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.log2e[i] = 0x1.715476p+0f;
3018*4bdc9457SAndroid Build Coastguard Worker }
3019*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[0] = UINT32_C(0x3F800000);
3020*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[1] = UINT32_C(0x3F7B95C2);
3021*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[2] = UINT32_C(0x3F7837F0);
3022*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[3] = UINT32_C(0x3F75FED7);
3023*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[4] = UINT32_C(0x3F7504F3);
3024*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[5] = UINT32_C(0x3F75672A);
3025*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[6] = UINT32_C(0x3F7744FD);
3026*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.table[7] = UINT32_C(0x3F7AC0C7);
3027*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3028*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.minus_ln2[i] = -0x1.62E430p-1f;
3029*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.c4[i] = 0x1.5558ECp-5f;
3030*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.c3[i] = 0x1.555C20p-3f;
3031*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.c2[i] = 0x1.000000p-1f;
3032*4bdc9457SAndroid Build Coastguard Worker }
3033*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
3034*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.mask_table[i] = -1;
3035*4bdc9457SAndroid Build Coastguard Worker }
3036*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3037*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut8_p4.mask_table[i] = 0;
3038*4bdc9457SAndroid Build Coastguard Worker }
3039*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_lut8_p4);
3040*4bdc9457SAndroid Build Coastguard Worker }
3041*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx2_rr1_lut4_p4_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3042*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx2_rr1_lut4_p4_params(
3043*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3044*4bdc9457SAndroid Build Coastguard Worker float prescale,
3045*4bdc9457SAndroid Build Coastguard Worker float alpha,
3046*4bdc9457SAndroid Build Coastguard Worker float beta)
3047*4bdc9457SAndroid Build Coastguard Worker {
3048*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3049*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.prescale[i] = prescale;
3050*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.alpha[i] = alpha;
3051*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.beta[i] = beta;
3052*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.sat_cutoff[i] = -0x1.154246p+4f;
3053*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.magic_bias[i] = 0x1.800000p21f;
3054*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.log2e[i] = 0x1.715476p+0f;
3055*4bdc9457SAndroid Build Coastguard Worker }
3056*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[0] = 0x1.000000p+0f;
3057*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[1] = 0x1.F06FE0p-1f;
3058*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[2] = 0x1.EA09E6p-1f;
3059*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[3] = 0x1.EE89FAp-1f;
3060*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[4] = 0x1.000000p+0f;
3061*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[5] = 0x1.F06FE0p-1f;
3062*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[6] = 0x1.EA09E6p-1f;
3063*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.table[7] = 0x1.EE89FAp-1f;
3064*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3065*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.minus_ln2[i] = -0x1.62E430p-1f;
3066*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.c4[i] = 0x1.554F9Ap-5f;
3067*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.c3[i] = 0x1.557082p-3f;
3068*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.c2[i] = 0x1.000002p-1f;
3069*4bdc9457SAndroid Build Coastguard Worker }
3070*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
3071*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.mask_table[i] = -1;
3072*4bdc9457SAndroid Build Coastguard Worker }
3073*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3074*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_lut4_p4.mask_table[i] = 0;
3075*4bdc9457SAndroid Build Coastguard Worker }
3076*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_lut4_p4);
3077*4bdc9457SAndroid Build Coastguard Worker }
3078*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx2_rr1_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3079*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx2_rr1_p6_params(
3080*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3081*4bdc9457SAndroid Build Coastguard Worker float prescale,
3082*4bdc9457SAndroid Build Coastguard Worker float alpha,
3083*4bdc9457SAndroid Build Coastguard Worker float beta)
3084*4bdc9457SAndroid Build Coastguard Worker {
3085*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3086*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.prescale[i] = prescale;
3087*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.alpha[i] = alpha;
3088*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.beta[i] = beta;
3089*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.sat_cutoff[i] = -0x1.154246p+4f;
3090*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.magic_bias[i] = 0x1.8000FEp23f;
3091*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.log2e[i] = 0x1.715476p+0f;
3092*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.minus_ln2[i] = -0x1.62E430p-1f;
3093*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.c6[i] = 0x1.6B7338p-10f;
3094*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.c5[i] = 0x1.12278Ep-7f;
3095*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.c4[i] = 0x1.555716p-5f;
3096*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.c3[i] = 0x1.5554B0p-3f;
3097*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.c2[i] = 0x1.FFFFFEp-2f;
3098*4bdc9457SAndroid Build Coastguard Worker }
3099*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
3100*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.mask_table[i] = -1;
3101*4bdc9457SAndroid Build Coastguard Worker }
3102*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3103*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p6.mask_table[i] = 0;
3104*4bdc9457SAndroid Build Coastguard Worker }
3105*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p6);
3106*4bdc9457SAndroid Build Coastguard Worker }
3107*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx512_rr1_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3108*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx512_rr1_lut16_p3_params(
3109*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3110*4bdc9457SAndroid Build Coastguard Worker float prescale,
3111*4bdc9457SAndroid Build Coastguard Worker float alpha,
3112*4bdc9457SAndroid Build Coastguard Worker float beta)
3113*4bdc9457SAndroid Build Coastguard Worker {
3114*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.prescale = prescale;
3115*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.alpha = alpha;
3116*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.beta = beta;
3117*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.sat_cutoff = -0x1.154246p+4f;
3118*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.magic_bias = 0x1.800000p19f;
3119*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.log2e = 0x1.715476p+0f;
3120*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.minus_ln2 = -0x1.62E430p-1f;
3121*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.c3 = 0x1.55561Cp-3f;
3122*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.c2 = 0x1.0001ECp-1f;
3123*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 0] = UINT32_C(0x3F800000);
3124*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 1] = UINT32_C(0x3F7DAAC3);
3125*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 2] = UINT32_C(0x3F7B95C2);
3126*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 3] = UINT32_C(0x3F79C3D3);
3127*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 4] = UINT32_C(0x3F7837F0);
3128*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 5] = UINT32_C(0x3F76F532);
3129*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 6] = UINT32_C(0x3F75FED7);
3130*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 7] = UINT32_C(0x3F75583F);
3131*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 8] = UINT32_C(0x3F7504F3);
3132*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[ 9] = UINT32_C(0x3F7508A4);
3133*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[10] = UINT32_C(0x3F75672A);
3134*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[11] = UINT32_C(0x3F76248C);
3135*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[12] = UINT32_C(0x3F7744FD);
3136*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[13] = UINT32_C(0x3F78CCDF);
3137*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[14] = UINT32_C(0x3F7AC0C7);
3138*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_lut16_p3.table[15] = UINT32_C(0x3F7D257D);
3139*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr1_lut16_p3);
3140*4bdc9457SAndroid Build Coastguard Worker }
3141*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_avx512_rr1_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3142*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_avx512_rr1_p6_params(
3143*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3144*4bdc9457SAndroid Build Coastguard Worker float prescale,
3145*4bdc9457SAndroid Build Coastguard Worker float alpha,
3146*4bdc9457SAndroid Build Coastguard Worker float beta)
3147*4bdc9457SAndroid Build Coastguard Worker {
3148*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.prescale = prescale;
3149*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.alpha = alpha;
3150*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.beta = beta;
3151*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.sat_cutoff = -0x1.154246p+4f;
3152*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.magic_bias = 0x1.8000FEp23f;
3153*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.log2e = 0x1.715476p+0f;
3154*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.minus_ln2 = -0x1.62E430p-1f;
3155*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.c6 = 0x1.6B7338p-10f;
3156*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.c5 = 0x1.12278Ep-7f;
3157*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.c4 = 0x1.555716p-5f;
3158*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.c3 = 0x1.5554B0p-3f;
3159*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p6.c2 = 0x1.FFFFFEp-2f;
3160*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr1_p6);
3161*4bdc9457SAndroid Build Coastguard Worker }
3162*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3163*4bdc9457SAndroid Build Coastguard Worker
3164*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3165*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params(
3166*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3167*4bdc9457SAndroid Build Coastguard Worker float prescale,
3168*4bdc9457SAndroid Build Coastguard Worker float alpha,
3169*4bdc9457SAndroid Build Coastguard Worker float beta)
3170*4bdc9457SAndroid Build Coastguard Worker {
3171*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
3172*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.prescale[i] = prescale;
3173*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.alpha[i] = alpha;
3174*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.beta[i] = beta;
3175*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.sat_cutoff[i] = -0x1.154246p+4f;
3176*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.magic_bias[i] = 0x1.800000p19f;
3177*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.log2e[i] = 0x1.715476p+0f;
3178*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.index_mask[i] = UINT32_C(0xF);
3179*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.minus_ln2_hi[i] = -0x1.62E400p-1f;
3180*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3181*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.c3[i] = 0x1.55561Cp-3f;
3182*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.c2[i] = 0x1.0001ECp-1f;
3183*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_lut16_p3.one[i] = 1.0f;
3184*4bdc9457SAndroid Build Coastguard Worker }
3185*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_rr2_lut16_p3);
3186*4bdc9457SAndroid Build Coastguard Worker }
3187*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_elu_wasmsimd_rr2_p6_params(union xnn_f32_elu_params params[XNN_MIN_ELEMENTS (1)],float prescale,float alpha,float beta)3188*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_elu_wasmsimd_rr2_p6_params(
3189*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_elu_params params[XNN_MIN_ELEMENTS(1)],
3190*4bdc9457SAndroid Build Coastguard Worker float prescale,
3191*4bdc9457SAndroid Build Coastguard Worker float alpha,
3192*4bdc9457SAndroid Build Coastguard Worker float beta)
3193*4bdc9457SAndroid Build Coastguard Worker {
3194*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
3195*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.prescale[i] = prescale;
3196*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.alpha[i] = alpha;
3197*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.beta[i] = beta;
3198*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.sat_cutoff[i] = -0x1.154246p+4f;
3199*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.magic_bias[i] = 0x1.8000FEp23f;
3200*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.log2e[i] = 0x1.715476p+0f;
3201*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.minus_ln2_hi[i] = -0x1.62E440p-1f;
3202*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.minus_ln2_lo[i] = 0x1.0105C6p-21f;
3203*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.c6[i] = 0x1.6b7338p-10f;
3204*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.c5[i] = 0x1.12278Ep-7f;
3205*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.c4[i] = 0x1.555716p-5f;
3206*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.c3[i] = 0x1.5554B0p-3f;
3207*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.c2[i] = 0x1.FFFFFEp-2f;
3208*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p6.one[i] = 1.0f;
3209*4bdc9457SAndroid Build Coastguard Worker }
3210*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_rr2_p6);
3211*4bdc9457SAndroid Build Coastguard Worker }
3212*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3213*4bdc9457SAndroid Build Coastguard Worker
3214*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_expminus_neonfp16arith_rr2_p2_params(union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS (1)])3215*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_expminus_neonfp16arith_rr2_p2_params(
3216*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS(1)])
3217*4bdc9457SAndroid Build Coastguard Worker {
3218*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.magic_bias = UINT16_C(0x660F); // 0x1.83Cp+10h
3219*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.log2e = UINT16_C(0x3DC5); // 0x1.714p+0h
3220*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.minus_ln2_hi = UINT16_C(0xB98C); // -0x1.630p-1h
3221*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.minus_ln2_lo = UINT16_C(0x0AF4); // 0x1.BD0p-13h
3222*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.c2 = UINT16_C(0x37F9); // 0x1.FE4p-2h
3223*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.c1 = UINT16_C(0x3C0E); // 0x1.038p+0h
3224*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith_rr2_p2.denorm_cutoff = UINT16_C(0xC8DA); // -0x1.368p+3h
3225*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith_rr2_p2);
3226*4bdc9457SAndroid Build Coastguard Worker }
3227*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3228*4bdc9457SAndroid Build Coastguard Worker
3229*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_expminus_avx2_rr1_p2_params(union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS (1)])3230*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_expminus_avx2_rr1_p2_params(
3231*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_expminus_params params[XNN_MIN_ELEMENTS(1)])
3232*4bdc9457SAndroid Build Coastguard Worker {
3233*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3234*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.magic_bias[i] = 0x1.8000FEp23f;
3235*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.log2e[i] = 0x1.715476p0f;
3236*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.minus_ln2[i] = -0x1.62E43p-1f;
3237*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.c2[i] = 0x1.FF3A32p-2f;
3238*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.c1[i] = 0x1.039E10p+0f;
3239*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p2.denorm_cutoff[i] = -0x1.368000p+3f;
3240*4bdc9457SAndroid Build Coastguard Worker }
3241*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p2);
3242*4bdc9457SAndroid Build Coastguard Worker }
3243*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3244*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_scalar_rr2_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3245*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_scalar_rr2_p5_params(
3246*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3247*4bdc9457SAndroid Build Coastguard Worker {
3248*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.log2e = 0x1.715476p+0f;
3249*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.magic_bias = 0x1.8000FEp23f;
3250*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.minus_ln2_hi = -0x1.62E400p-1f;
3251*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3252*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c5 = 0x1.0F9F9Cp-7f;
3253*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c4 = 0x1.573A1Ap-5f;
3254*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c3 = 0x1.555A80p-3f;
3255*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c2 = 0x1.FFFDC6p-2f;
3256*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.c1 = 0x1.FFFFF6p-1f;
3257*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_p5.denorm_cutoff = -0x1.5D589Ep6f;
3258*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_p5);
3259*4bdc9457SAndroid Build Coastguard Worker }
3260*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_scalar_rr2_lut64_p2_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3261*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_scalar_rr2_lut64_p2_params(
3262*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3263*4bdc9457SAndroid Build Coastguard Worker {
3264*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.log2e = 0x1.715476p0f;
3265*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
3266*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.minus_ln2_hi = -0x1.630000p-1f;
3267*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.minus_ln2_lo = 0x1.BD0106p-13f;
3268*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3269*4bdc9457SAndroid Build Coastguard Worker params->scalar_rr2_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3270*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_rr2_lut64_p2);
3271*4bdc9457SAndroid Build Coastguard Worker }
3272*4bdc9457SAndroid Build Coastguard Worker
3273*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_expminus_neon_rr2_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3274*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_neon_rr2_p5_params(
3275*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3276*4bdc9457SAndroid Build Coastguard Worker {
3277*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.log2e = 0x1.715476p+0f;
3278*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.magic_bias = 0x1.8000FEp23f;
3279*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.minus_ln2_hi = -0x1.62E400p-1f;
3280*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3281*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c5 = 0x1.0F9F9Cp-7f;
3282*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c4 = 0x1.573A1Ap-5f;
3283*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c3 = 0x1.555A80p-3f;
3284*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c2 = 0x1.FFFDC6p-2f;
3285*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.c1 = 0x1.FFFFF6p-1f;
3286*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_p5.denorm_cutoff = -0x1.5D589Ep6f;
3287*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_p5);
3288*4bdc9457SAndroid Build Coastguard Worker }
3289*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_neon_rr2_lut64_p2_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3290*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_neon_rr2_lut64_p2_params(
3291*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3292*4bdc9457SAndroid Build Coastguard Worker {
3293*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.log2e = 0x1.715476p+0f;
3294*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.magic_bias = 0x1.800000p17f;
3295*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.minus_ln2_hi = -0x1.62E400p-1f;
3296*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.minus_ln2_lo = -0x1.7F7D1Cp-20f;
3297*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3298*4bdc9457SAndroid Build Coastguard Worker params->neon_rr2_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3299*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon_rr2_lut64_p2);
3300*4bdc9457SAndroid Build Coastguard Worker }
3301*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_neonfma_rr1_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3302*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_neonfma_rr1_p5_params(
3303*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3304*4bdc9457SAndroid Build Coastguard Worker {
3305*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.log2e = 0x1.715476p+0f;
3306*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.magic_bias = 0x1.8000FEp23f;
3307*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
3308*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
3309*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c4 = 0x1.573A1Ap-5f;
3310*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c3 = 0x1.555A80p-3f;
3311*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c2 = 0x1.FFFDC6p-2f;
3312*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.c1 = 0x1.FFFFF6p-1f;
3313*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_p5.denorm_cutoff = -0x1.5D589Ep6f;
3314*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_p5);
3315*4bdc9457SAndroid Build Coastguard Worker }
3316*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3317*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_neonfma_rr1_lut64_p2_params(
3318*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3319*4bdc9457SAndroid Build Coastguard Worker {
3320*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.log2e = 0x1.715476p+0f;
3321*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.magic_bias = 0x1.800000p17f;
3322*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.minus_ln2 = -0x1.62E430p-1f;
3323*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.c2 = 0x1.FFFF0Ap-2f;
3324*4bdc9457SAndroid Build Coastguard Worker params->neonfma_rr1_lut64_p2.denorm_cutoff = -0x1.5D589Ep6f;
3325*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfma_rr1_lut64_p2);
3326*4bdc9457SAndroid Build Coastguard Worker }
3327*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3328*4bdc9457SAndroid Build Coastguard Worker
3329*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_expminus_sse2_rr2_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3330*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_sse2_rr2_p5_params(
3331*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3332*4bdc9457SAndroid Build Coastguard Worker {
3333*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3334*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.log2e[i] = 0x1.715476p+0f;
3335*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
3336*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
3337*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3338*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
3339*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
3340*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c3[i] = 0x1.555A80p-3f;
3341*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
3342*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
3343*4bdc9457SAndroid Build Coastguard Worker params->sse2_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3344*4bdc9457SAndroid Build Coastguard Worker }
3345*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2_rr2_p5);
3346*4bdc9457SAndroid Build Coastguard Worker }
3347*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_avx2_rr1_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3348*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_avx2_rr1_p5_params(
3349*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3350*4bdc9457SAndroid Build Coastguard Worker {
3351*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3352*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.log2e[i] = 0x1.715476p+0f;
3353*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.magic_bias[i] = 0x1.8000FEp23f;
3354*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.minus_ln2[i] = -0x1.62E430p-1f;
3355*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c5[i] = 0x1.0F9F9Cp-7f;
3356*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c4[i] = 0x1.573A1Ap-5f;
3357*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c3[i] = 0x1.555A80p-3f;
3358*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c2[i] = 0x1.FFFDC6p-2f;
3359*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.c1[i] = 0x1.FFFFF6p-1f;
3360*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3361*4bdc9457SAndroid Build Coastguard Worker }
3362*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
3363*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.mask_table[i] = -1;
3364*4bdc9457SAndroid Build Coastguard Worker }
3365*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3366*4bdc9457SAndroid Build Coastguard Worker params->avx2_rr1_p5.mask_table[i] = 0;
3367*4bdc9457SAndroid Build Coastguard Worker }
3368*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2_rr1_p5);
3369*4bdc9457SAndroid Build Coastguard Worker }
3370*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_expminus_avx512_rr1_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3371*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_avx512_rr1_p5_params(
3372*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3373*4bdc9457SAndroid Build Coastguard Worker {
3374*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.log2e = 0x1.715476p+0f;
3375*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.minus_ln2 = -0x1.62E430p-1f;
3376*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c5 = 0x1.0F9F9Cp-7f;
3377*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c4 = 0x1.573A1Ap-5f;
3378*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c3 = 0x1.555A80p-3f;
3379*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c2 = 0x1.FFFDC6p-2f;
3380*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c1 = 0x1.FFFFF6p-1f;
3381*4bdc9457SAndroid Build Coastguard Worker params->avx512_rr1_p5.c0 = 1.0f;
3382*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512_rr1_p5);
3383*4bdc9457SAndroid Build Coastguard Worker }
3384*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3385*4bdc9457SAndroid Build Coastguard Worker
3386*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_expminus_wasmsimd_rr2_p5_params(union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS (1)])3387*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_expminus_wasmsimd_rr2_p5_params(
3388*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params params[XNN_MIN_ELEMENTS(1)])
3389*4bdc9457SAndroid Build Coastguard Worker {
3390*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
3391*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.log2e[i] = 0x1.715476p+0f;
3392*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.magic_bias[i] = 0x1.8000FEp23f;
3393*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.minus_ln2_hi[i] = -0x1.62E400p-1f;
3394*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.minus_ln2_lo[i] = -0x1.7F7D1Cp-20f;
3395*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c5[i] = 0x1.0F9F9Cp-7f;
3396*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c4[i] = 0x1.573A1Ap-5f;
3397*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c3[i] = 0x1.555A80p-3f;
3398*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c2[i] = 0x1.FFFDC6p-2f;
3399*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.c1[i] = 0x1.FFFFF6p-1f;
3400*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_rr2_p5.denorm_cutoff[i] = -0x1.5D589Ep6f;
3401*4bdc9457SAndroid Build Coastguard Worker }
3402*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_rr2_p5);
3403*4bdc9457SAndroid Build Coastguard Worker }
3404*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3405*4bdc9457SAndroid Build Coastguard Worker
3406*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_lrelu_neon_params(union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS (1)],uint16_t slope)3407*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_lrelu_neon_params(
3408*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3409*4bdc9457SAndroid Build Coastguard Worker uint16_t slope)
3410*4bdc9457SAndroid Build Coastguard Worker {
3411*4bdc9457SAndroid Build Coastguard Worker params->neon.slope = slope;
3412*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
3413*4bdc9457SAndroid Build Coastguard Worker }
3414*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3415*4bdc9457SAndroid Build Coastguard Worker
3416*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_lrelu_avx_params(union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS (1)],uint16_t slope)3417*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_lrelu_avx_params(
3418*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3419*4bdc9457SAndroid Build Coastguard Worker uint16_t slope)
3420*4bdc9457SAndroid Build Coastguard Worker {
3421*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3422*4bdc9457SAndroid Build Coastguard Worker params->avx.slope[i] = fp16_ieee_to_fp32_value(slope);
3423*4bdc9457SAndroid Build Coastguard Worker }
3424*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
3425*4bdc9457SAndroid Build Coastguard Worker }
3426*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3427*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_lrelu_scalar_params(union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS (1)],float slope)3428*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_lrelu_scalar_params(
3429*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3430*4bdc9457SAndroid Build Coastguard Worker float slope)
3431*4bdc9457SAndroid Build Coastguard Worker {
3432*4bdc9457SAndroid Build Coastguard Worker params->scalar.slope = slope;
3433*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
3434*4bdc9457SAndroid Build Coastguard Worker }
3435*4bdc9457SAndroid Build Coastguard Worker
3436*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_lrelu_sse_params(union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS (1)],float slope)3437*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_lrelu_sse_params(
3438*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3439*4bdc9457SAndroid Build Coastguard Worker float slope)
3440*4bdc9457SAndroid Build Coastguard Worker {
3441*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3442*4bdc9457SAndroid Build Coastguard Worker params->sse.slope[i] = slope;
3443*4bdc9457SAndroid Build Coastguard Worker }
3444*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
3445*4bdc9457SAndroid Build Coastguard Worker }
3446*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_lrelu_avx_params(union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS (1)],float slope)3447*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_lrelu_avx_params(
3448*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3449*4bdc9457SAndroid Build Coastguard Worker float slope)
3450*4bdc9457SAndroid Build Coastguard Worker {
3451*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3452*4bdc9457SAndroid Build Coastguard Worker params->avx.slope[i] = slope;
3453*4bdc9457SAndroid Build Coastguard Worker }
3454*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
3455*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
3456*4bdc9457SAndroid Build Coastguard Worker }
3457*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
3458*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
3459*4bdc9457SAndroid Build Coastguard Worker }
3460*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
3461*4bdc9457SAndroid Build Coastguard Worker }
3462*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3463*4bdc9457SAndroid Build Coastguard Worker
3464*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_lrelu_wasmsimd_params(union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS (1)],float slope)3465*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_lrelu_wasmsimd_params(
3466*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3467*4bdc9457SAndroid Build Coastguard Worker float slope)
3468*4bdc9457SAndroid Build Coastguard Worker {
3469*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.slope[0] = slope;
3470*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.slope[1] = slope;
3471*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
3472*4bdc9457SAndroid Build Coastguard Worker }
3473*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3474*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_lrelu_scalar_select_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3475*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_scalar_select_params(
3476*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3477*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3478*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3479*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3480*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3481*4bdc9457SAndroid Build Coastguard Worker {
3482*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3483*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3484*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3485*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3486*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3487*4bdc9457SAndroid Build Coastguard Worker
3488*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(256.0f * positive_scale);
3489*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= 1L);
3490*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= 32768L);
3491*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(256.0f * negative_scale);
3492*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32768L);
3493*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32767L);
3494*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3495*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.input_zero_point = (int32_t) input_zero_point;
3496*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.positive_multiplier = (int32_t) positive_multiplier;
3497*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.negative_multiplier = (int32_t) negative_multiplier;
3498*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3499*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_select);
3500*4bdc9457SAndroid Build Coastguard Worker }
3501*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_lrelu_scalar_andxor_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3502*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_scalar_andxor_params(
3503*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3504*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3505*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3506*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3507*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3508*4bdc9457SAndroid Build Coastguard Worker {
3509*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3510*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3511*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3512*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3513*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3514*4bdc9457SAndroid Build Coastguard Worker
3515*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(256.0f * positive_scale);
3516*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= 1L);
3517*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= 32768L);
3518*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(256.0f * negative_scale);
3519*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32768L);
3520*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32767L);
3521*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3522*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.input_zero_point = (int32_t) input_zero_point;
3523*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.multiplier_base = (int32_t) positive_multiplier;
3524*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.multiplier_diff = (int32_t) negative_multiplier ^ (int32_t) positive_multiplier;
3525*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3526*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_andxor);
3527*4bdc9457SAndroid Build Coastguard Worker }
3528*4bdc9457SAndroid Build Coastguard Worker
3529*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qs8_lrelu_armsimd32_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3530*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_armsimd32_params(
3531*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3532*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3533*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3534*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3535*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3536*4bdc9457SAndroid Build Coastguard Worker {
3537*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3538*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3539*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3540*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3541*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3542*4bdc9457SAndroid Build Coastguard Worker
3543*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3544*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3545*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3546*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3547*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3548*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3549*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3550*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.input_zero_point = (uint32_t) (uint16_t) (int16_t) input_zero_point * UINT32_C(0x00010001);
3551*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.positive_multiplier = (uint32_t) (uint16_t) (int16_t) positive_multiplier * UINT32_C(0x00010001);
3552*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.negative_multiplier = (uint32_t) (uint16_t) (int16_t) negative_multiplier * UINT32_C(0x00010001);
3553*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3554*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->armsimd32);
3555*4bdc9457SAndroid Build Coastguard Worker }
3556*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
3557*4bdc9457SAndroid Build Coastguard Worker
3558*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_lrelu_neon_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3559*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_neon_params(
3560*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3561*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3562*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3563*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3564*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3565*4bdc9457SAndroid Build Coastguard Worker {
3566*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3567*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3568*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3569*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3570*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3571*4bdc9457SAndroid Build Coastguard Worker
3572*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3573*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3574*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3575*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3576*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3577*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3578*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3579*4bdc9457SAndroid Build Coastguard Worker params->neon.input_zero_point = (int16_t) input_zero_point;
3580*4bdc9457SAndroid Build Coastguard Worker params->neon.positive_multiplier = (int16_t) positive_multiplier;
3581*4bdc9457SAndroid Build Coastguard Worker params->neon.negative_multiplier = (int16_t) negative_multiplier;
3582*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) output_zero_point;
3583*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
3584*4bdc9457SAndroid Build Coastguard Worker }
3585*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3586*4bdc9457SAndroid Build Coastguard Worker
3587*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_lrelu_sse2_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3588*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_sse2_params(
3589*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3590*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3591*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3592*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3593*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3594*4bdc9457SAndroid Build Coastguard Worker {
3595*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3596*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3597*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3598*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3599*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3600*4bdc9457SAndroid Build Coastguard Worker
3601*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3602*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3603*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3604*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3605*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3606*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3607*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3608*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_base = (int16_t) negative_multiplier;
3609*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3610*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3611*4bdc9457SAndroid Build Coastguard Worker params->sse2.input_zero_point[i] = (int16_t) input_zero_point;
3612*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier_diff[i] = multiplier_diff;
3613*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier_base[i] = multiplier_base;
3614*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
3615*4bdc9457SAndroid Build Coastguard Worker }
3616*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
3617*4bdc9457SAndroid Build Coastguard Worker }
3618*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_lrelu_avx_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3619*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_avx_params(
3620*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3621*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3622*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3623*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3624*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3625*4bdc9457SAndroid Build Coastguard Worker {
3626*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3627*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3628*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3629*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3630*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3631*4bdc9457SAndroid Build Coastguard Worker
3632*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3633*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3634*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3635*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3636*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3637*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3638*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3639*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3640*4bdc9457SAndroid Build Coastguard Worker params->avx.input_zero_point[i] = (int16_t) input_zero_point;
3641*4bdc9457SAndroid Build Coastguard Worker params->avx.positive_multiplier[i] = (int16_t) positive_multiplier;
3642*4bdc9457SAndroid Build Coastguard Worker params->avx.negative_multiplier[i] = (int16_t) negative_multiplier;
3643*4bdc9457SAndroid Build Coastguard Worker params->avx.output_zero_point[i] = (int16_t) output_zero_point;
3644*4bdc9457SAndroid Build Coastguard Worker }
3645*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
3646*4bdc9457SAndroid Build Coastguard Worker }
3647*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_lrelu_avx2_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3648*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_avx2_params(
3649*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3650*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3651*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3652*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3653*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3654*4bdc9457SAndroid Build Coastguard Worker {
3655*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3656*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3657*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3658*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3659*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3660*4bdc9457SAndroid Build Coastguard Worker
3661*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3662*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3663*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3664*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3665*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3666*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3667*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3668*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
3669*4bdc9457SAndroid Build Coastguard Worker params->avx2.input_zero_point[i] = (int16_t) input_zero_point;
3670*4bdc9457SAndroid Build Coastguard Worker params->avx2.positive_multiplier[i] = (int16_t) positive_multiplier;
3671*4bdc9457SAndroid Build Coastguard Worker params->avx2.negative_multiplier[i] = (int16_t) negative_multiplier;
3672*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
3673*4bdc9457SAndroid Build Coastguard Worker }
3674*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
3675*4bdc9457SAndroid Build Coastguard Worker }
3676*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3677*4bdc9457SAndroid Build Coastguard Worker
3678*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_lrelu_wasmsimd_arm_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3679*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_wasmsimd_arm_params(
3680*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3681*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3682*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3683*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3684*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3685*4bdc9457SAndroid Build Coastguard Worker {
3686*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3687*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3688*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3689*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3690*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3691*4bdc9457SAndroid Build Coastguard Worker
3692*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3693*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3694*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3695*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3696*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3697*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3698*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3699*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3700*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.input_zero_point[i] = (int16_t) input_zero_point;
3701*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.positive_multiplier[i] = (int16_t) positive_multiplier;
3702*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.negative_multiplier[i] = (int16_t) negative_multiplier;
3703*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.output_zero_point[i] = (int16_t) output_zero_point;
3704*4bdc9457SAndroid Build Coastguard Worker }
3705*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_arm);
3706*4bdc9457SAndroid Build Coastguard Worker }
3707*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_lrelu_wasmsimd_x86_params(union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,int8_t input_zero_point,int8_t output_zero_point)3708*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_lrelu_wasmsimd_x86_params(
3709*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3710*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3711*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3712*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
3713*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
3714*4bdc9457SAndroid Build Coastguard Worker {
3715*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3716*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3717*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3718*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3719*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3720*4bdc9457SAndroid Build Coastguard Worker
3721*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3722*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3723*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3724*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3725*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3726*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3727*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3728*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_base = (int16_t) negative_multiplier;
3729*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3730*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3731*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.input_zero_point[i] = (int16_t) input_zero_point;
3732*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.multiplier_diff[i] = multiplier_diff;
3733*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.multiplier_base[i] = multiplier_base;
3734*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.output_zero_point[i] = (int16_t) output_zero_point;
3735*4bdc9457SAndroid Build Coastguard Worker }
3736*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_x86);
3737*4bdc9457SAndroid Build Coastguard Worker }
3738*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3739*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_lrelu_scalar_select_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3740*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_scalar_select_params(
3741*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3742*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3743*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3744*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3745*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3746*4bdc9457SAndroid Build Coastguard Worker {
3747*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3748*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3749*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3750*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3751*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3752*4bdc9457SAndroid Build Coastguard Worker
3753*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(256.0f * positive_scale);
3754*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= 1L);
3755*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= 32768L);
3756*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(256.0f * negative_scale);
3757*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32768L);
3758*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32767L);
3759*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3760*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.input_zero_point = (int32_t) input_zero_point;
3761*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.positive_multiplier = (int32_t) positive_multiplier;
3762*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.negative_multiplier = (int32_t) negative_multiplier;
3763*4bdc9457SAndroid Build Coastguard Worker params->scalar_select.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3764*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_select);
3765*4bdc9457SAndroid Build Coastguard Worker }
3766*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_lrelu_scalar_andxor_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3767*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_scalar_andxor_params(
3768*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3769*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3770*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3771*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3772*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3773*4bdc9457SAndroid Build Coastguard Worker {
3774*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3775*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3776*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3777*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3778*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3779*4bdc9457SAndroid Build Coastguard Worker
3780*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(256.0f * positive_scale);
3781*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= 1L);
3782*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= 32768L);
3783*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(256.0f * negative_scale);
3784*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32768L);
3785*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32767L);
3786*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3787*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.input_zero_point = (int32_t) input_zero_point;
3788*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.multiplier_base = (int32_t) positive_multiplier;
3789*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.multiplier_diff = (int32_t) negative_multiplier ^ (int32_t) positive_multiplier;
3790*4bdc9457SAndroid Build Coastguard Worker params->scalar_andxor.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3791*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_andxor);
3792*4bdc9457SAndroid Build Coastguard Worker }
3793*4bdc9457SAndroid Build Coastguard Worker
3794*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qu8_lrelu_armsimd32_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3795*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_armsimd32_params(
3796*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3797*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3798*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3799*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3800*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3801*4bdc9457SAndroid Build Coastguard Worker {
3802*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3803*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3804*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3805*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3806*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3807*4bdc9457SAndroid Build Coastguard Worker
3808*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3809*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3810*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3811*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3812*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3813*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3814*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3815*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.input_zero_point = (uint32_t) input_zero_point * UINT32_C(0x00010001);
3816*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.positive_multiplier = (uint32_t) (uint16_t) (int16_t) positive_multiplier * UINT32_C(0x00010001);
3817*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.negative_multiplier = (uint32_t) (uint16_t) (int16_t) negative_multiplier * UINT32_C(0x00010001);
3818*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.bias = ((int32_t) output_zero_point << 8) + INT32_C(0x80);
3819*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->armsimd32);
3820*4bdc9457SAndroid Build Coastguard Worker }
3821*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
3822*4bdc9457SAndroid Build Coastguard Worker
3823*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_lrelu_neon_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3824*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_neon_params(
3825*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3826*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3827*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3828*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3829*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3830*4bdc9457SAndroid Build Coastguard Worker {
3831*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3832*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3833*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3834*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3835*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3836*4bdc9457SAndroid Build Coastguard Worker
3837*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3838*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3839*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3840*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3841*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3842*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3843*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3844*4bdc9457SAndroid Build Coastguard Worker params->neon.input_zero_point = (uint16_t) input_zero_point;
3845*4bdc9457SAndroid Build Coastguard Worker params->neon.positive_multiplier = (int16_t) positive_multiplier;
3846*4bdc9457SAndroid Build Coastguard Worker params->neon.negative_multiplier = (int16_t) negative_multiplier;
3847*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) output_zero_point;
3848*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
3849*4bdc9457SAndroid Build Coastguard Worker }
3850*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3851*4bdc9457SAndroid Build Coastguard Worker
3852*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_lrelu_sse2_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3853*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_sse2_params(
3854*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3855*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3856*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3857*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3858*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3859*4bdc9457SAndroid Build Coastguard Worker {
3860*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3861*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3862*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3863*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3864*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3865*4bdc9457SAndroid Build Coastguard Worker
3866*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3867*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3868*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3869*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3870*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3871*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3872*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3873*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_base = (int16_t) negative_multiplier;
3874*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3875*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3876*4bdc9457SAndroid Build Coastguard Worker params->sse2.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3877*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier_diff[i] = multiplier_diff;
3878*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier_base[i] = multiplier_base;
3879*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3880*4bdc9457SAndroid Build Coastguard Worker }
3881*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
3882*4bdc9457SAndroid Build Coastguard Worker }
3883*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_lrelu_avx_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3884*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_avx_params(
3885*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3886*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3887*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3888*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3889*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3890*4bdc9457SAndroid Build Coastguard Worker {
3891*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3892*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3893*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3894*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3895*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3896*4bdc9457SAndroid Build Coastguard Worker
3897*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3898*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3899*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3900*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3901*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3902*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3903*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3904*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
3905*4bdc9457SAndroid Build Coastguard Worker params->avx.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3906*4bdc9457SAndroid Build Coastguard Worker params->avx.positive_multiplier[i] = (int16_t) positive_multiplier;
3907*4bdc9457SAndroid Build Coastguard Worker params->avx.negative_multiplier[i] = (int16_t) negative_multiplier;
3908*4bdc9457SAndroid Build Coastguard Worker params->avx.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3909*4bdc9457SAndroid Build Coastguard Worker }
3910*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
3911*4bdc9457SAndroid Build Coastguard Worker }
3912*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_lrelu_avx2_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3913*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_avx2_params(
3914*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3915*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3916*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3917*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3918*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3919*4bdc9457SAndroid Build Coastguard Worker {
3920*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3921*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3922*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3923*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3924*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3925*4bdc9457SAndroid Build Coastguard Worker
3926*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3927*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3928*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3929*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3930*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3931*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3932*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3933*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
3934*4bdc9457SAndroid Build Coastguard Worker params->avx2.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3935*4bdc9457SAndroid Build Coastguard Worker params->avx2.positive_multiplier[i] = (int16_t) positive_multiplier;
3936*4bdc9457SAndroid Build Coastguard Worker params->avx2.negative_multiplier[i] = (int16_t) negative_multiplier;
3937*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3938*4bdc9457SAndroid Build Coastguard Worker }
3939*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
3940*4bdc9457SAndroid Build Coastguard Worker }
3941*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3942*4bdc9457SAndroid Build Coastguard Worker
3943*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_lrelu_wasmsimd_arm_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3944*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_wasmsimd_arm_params(
3945*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3946*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3947*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3948*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3949*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3950*4bdc9457SAndroid Build Coastguard Worker {
3951*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3952*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3953*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3954*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3955*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3956*4bdc9457SAndroid Build Coastguard Worker
3957*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3958*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3959*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3960*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3961*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3962*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3963*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3964*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3965*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3966*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.positive_multiplier[i] = (int16_t) positive_multiplier;
3967*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.negative_multiplier[i] = (int16_t) negative_multiplier;
3968*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_arm.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
3969*4bdc9457SAndroid Build Coastguard Worker }
3970*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_arm);
3971*4bdc9457SAndroid Build Coastguard Worker }
3972*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_lrelu_wasmsimd_x86_params(union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS (1)],float positive_scale,float negative_scale,uint8_t input_zero_point,uint8_t output_zero_point)3973*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_lrelu_wasmsimd_x86_params(
3974*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_lrelu_params params[XNN_MIN_ELEMENTS(1)],
3975*4bdc9457SAndroid Build Coastguard Worker float positive_scale,
3976*4bdc9457SAndroid Build Coastguard Worker float negative_scale,
3977*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
3978*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
3979*4bdc9457SAndroid Build Coastguard Worker {
3980*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale >= 0x1.0p-8f);
3981*4bdc9457SAndroid Build Coastguard Worker assert(positive_scale <= 0x1.0p+7f);
3982*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale <= 0x1.0p+7f);
3983*4bdc9457SAndroid Build Coastguard Worker assert(negative_scale >= -0x1.FFFC00p+6f);
3984*4bdc9457SAndroid Build Coastguard Worker assert(fabsf(negative_scale) >= 0x1.0p-8f);
3985*4bdc9457SAndroid Build Coastguard Worker
3986*4bdc9457SAndroid Build Coastguard Worker const long positive_multiplier = lrintf(-256.0f * positive_scale);
3987*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier <= -1L);
3988*4bdc9457SAndroid Build Coastguard Worker assert(positive_multiplier >= -32768L);
3989*4bdc9457SAndroid Build Coastguard Worker const long negative_multiplier = lrintf(-256.0f * negative_scale);
3990*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier >= -32768L);
3991*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier <= 32767L);
3992*4bdc9457SAndroid Build Coastguard Worker assert(negative_multiplier != 0L);
3993*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_base = (int16_t) negative_multiplier;
3994*4bdc9457SAndroid Build Coastguard Worker const int16_t multiplier_diff = (int16_t) positive_multiplier ^ (int16_t) negative_multiplier;
3995*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
3996*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.input_zero_point[i] = (int16_t) (uint16_t) input_zero_point;
3997*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.multiplier_diff[i] = multiplier_diff;
3998*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.multiplier_base[i] = multiplier_base;
3999*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_x86.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4000*4bdc9457SAndroid Build Coastguard Worker }
4001*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_x86);
4002*4bdc9457SAndroid Build Coastguard Worker }
4003*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4004*4bdc9457SAndroid Build Coastguard Worker
4005*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_sqrt_avx_params(union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS (1)])4006*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sqrt_avx_params(
4007*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4008*4bdc9457SAndroid Build Coastguard Worker {
4009*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
4010*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
4011*4bdc9457SAndroid Build Coastguard Worker }
4012*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
4013*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
4014*4bdc9457SAndroid Build Coastguard Worker }
4015*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
4016*4bdc9457SAndroid Build Coastguard Worker }
4017*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sqrt_fma_params(union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS (1)])4018*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sqrt_fma_params(
4019*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4020*4bdc9457SAndroid Build Coastguard Worker {
4021*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4022*4bdc9457SAndroid Build Coastguard Worker params->fma.half[i] = 0.5f;
4023*4bdc9457SAndroid Build Coastguard Worker }
4024*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
4025*4bdc9457SAndroid Build Coastguard Worker params->fma.mask_table[i] = -1;
4026*4bdc9457SAndroid Build Coastguard Worker }
4027*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
4028*4bdc9457SAndroid Build Coastguard Worker params->fma.mask_table[i] = 0;
4029*4bdc9457SAndroid Build Coastguard Worker }
4030*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fma);
4031*4bdc9457SAndroid Build Coastguard Worker }
4032*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_sqrt_avx512_params(union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS (1)])4033*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_sqrt_avx512_params(
4034*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_sqrt_params params[XNN_MIN_ELEMENTS(1)])
4035*4bdc9457SAndroid Build Coastguard Worker {
4036*4bdc9457SAndroid Build Coastguard Worker params->avx512.half = 0.5f;
4037*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
4038*4bdc9457SAndroid Build Coastguard Worker }
4039*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4040*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_chw_params(union xnn_f32_chw_params params[XNN_MIN_ELEMENTS (1)],uint32_t width,float output_min,float output_max)4041*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_chw_params(
4042*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_chw_params params[XNN_MIN_ELEMENTS(1)],
4043*4bdc9457SAndroid Build Coastguard Worker uint32_t width,
4044*4bdc9457SAndroid Build Coastguard Worker float output_min,
4045*4bdc9457SAndroid Build Coastguard Worker float output_max)
4046*4bdc9457SAndroid Build Coastguard Worker {
4047*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4048*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4049*4bdc9457SAndroid Build Coastguard Worker params->sse.min[i] = output_min;
4050*4bdc9457SAndroid Build Coastguard Worker params->sse.max[i] = output_max;
4051*4bdc9457SAndroid Build Coastguard Worker }
4052*4bdc9457SAndroid Build Coastguard Worker
4053*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4054*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
4055*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[1] = -(uint32_t) (w4 >= 1);
4056*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[2] = -(uint32_t) (w4 >= 2);
4057*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[3] = -(uint32_t) (w4 >= 3);
4058*4bdc9457SAndroid Build Coastguard Worker
4059*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4060*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[0] = UINT32_C(0xFFFFFFFF);
4061*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[1] = -(uint32_t) (w8 >= 2);
4062*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[2] = -(uint32_t) (w8 >= 4);
4063*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[3] = -(uint32_t) (w8 >= 6);
4064*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[0] = -(uint32_t) (w8 >= 1);
4065*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[1] = -(uint32_t) (w8 >= 3);
4066*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[2] = -(uint32_t) (w8 >= 5);
4067*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[3] = -(uint32_t) (w8 >= 7);
4068*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse);
4069*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4070*4bdc9457SAndroid Build Coastguard Worker params->neon.min = output_min;
4071*4bdc9457SAndroid Build Coastguard Worker params->neon.max = output_max;
4072*4bdc9457SAndroid Build Coastguard Worker
4073*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4074*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
4075*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[1] = -(uint32_t) (w4 >= 1);
4076*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[2] = -(uint32_t) (w4 >= 2);
4077*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[3] = -(uint32_t) (w4 >= 3);
4078*4bdc9457SAndroid Build Coastguard Worker
4079*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4080*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[0] = UINT32_C(0xFFFFFFFF);
4081*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[1] = -(uint32_t) (w8 >= 2);
4082*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[2] = -(uint32_t) (w8 >= 4);
4083*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[3] = -(uint32_t) (w8 >= 6);
4084*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[0] = -(uint32_t) (w8 >= 1);
4085*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[1] = -(uint32_t) (w8 >= 3);
4086*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[2] = -(uint32_t) (w8 >= 5);
4087*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[3] = -(uint32_t) (w8 >= 7);
4088*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
4089*4bdc9457SAndroid Build Coastguard Worker #else
4090*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = output_min;
4091*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = output_max;
4092*4bdc9457SAndroid Build Coastguard Worker
4093*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4094*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4095*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4096*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4097*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4098*4bdc9457SAndroid Build Coastguard Worker
4099*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4100*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4101*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4102*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4103*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4104*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4105*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4106*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4107*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4108*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4109*4bdc9457SAndroid Build Coastguard Worker #endif
4110*4bdc9457SAndroid Build Coastguard Worker }
4111*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_chw_params(union xnn_f16_chw_params params[XNN_MIN_ELEMENTS (1)],uint32_t width,uint16_t output_min,uint16_t output_max)4112*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_chw_params(
4113*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_chw_params params[XNN_MIN_ELEMENTS(1)],
4114*4bdc9457SAndroid Build Coastguard Worker uint32_t width,
4115*4bdc9457SAndroid Build Coastguard Worker uint16_t output_min,
4116*4bdc9457SAndroid Build Coastguard Worker uint16_t output_max)
4117*4bdc9457SAndroid Build Coastguard Worker {
4118*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
4119*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.min = output_min;
4120*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.max = output_max;
4121*4bdc9457SAndroid Build Coastguard Worker
4122*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4123*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[0] = UINT16_C(0xFFFF);
4124*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[1] = -(uint16_t) (w4 >= 1);
4125*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[2] = -(uint16_t) (w4 >= 2);
4126*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask[3] = -(uint16_t) (w4 >= 3);
4127*4bdc9457SAndroid Build Coastguard Worker
4128*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4129*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[0] = UINT16_C(0xFFFF);
4130*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[1] = -(uint16_t) (w8 >= 1);
4131*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[2] = -(uint16_t) (w8 >= 2);
4132*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[3] = -(uint16_t) (w8 >= 3);
4133*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[4] = -(uint16_t) (w8 >= 4);
4134*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[5] = -(uint16_t) (w8 >= 5);
4135*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[6] = -(uint16_t) (w8 >= 6);
4136*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.maskx8[7] = -(uint16_t) (w8 >= 7);
4137*4bdc9457SAndroid Build Coastguard Worker
4138*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_even[0] = UINT16_C(0xFFFF);
4139*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_even[1] = -(uint16_t) (w8 >= 2);
4140*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_even[2] = -(uint16_t) (w8 >= 4);
4141*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_even[3] = -(uint16_t) (w8 >= 6);
4142*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_odd[0] = -(uint16_t) (w8 >= 1);
4143*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_odd[1] = -(uint16_t) (w8 >= 3);
4144*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_odd[2] = -(uint16_t) (w8 >= 5);
4145*4bdc9457SAndroid Build Coastguard Worker params->neonfp16arith.mask_odd[3] = -(uint16_t) (w8 >= 7);
4146*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonfp16arith);
4147*4bdc9457SAndroid Build Coastguard Worker #else
4148*4bdc9457SAndroid Build Coastguard Worker return 0;
4149*4bdc9457SAndroid Build Coastguard Worker #endif
4150*4bdc9457SAndroid Build Coastguard Worker }
4151*4bdc9457SAndroid Build Coastguard Worker
xnn_update_f32_chw_params(union xnn_f32_chw_params * params,uint32_t width)4152*4bdc9457SAndroid Build Coastguard Worker void xnn_update_f32_chw_params(
4153*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_chw_params* params,
4154*4bdc9457SAndroid Build Coastguard Worker uint32_t width)
4155*4bdc9457SAndroid Build Coastguard Worker {
4156*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4157*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4158*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[0] = UINT32_C(0xFFFFFFFF);
4159*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[1] = -(uint32_t) (w4 >= 1);
4160*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[2] = -(uint32_t) (w4 >= 2);
4161*4bdc9457SAndroid Build Coastguard Worker params->sse.mask[3] = -(uint32_t) (w4 >= 3);
4162*4bdc9457SAndroid Build Coastguard Worker
4163*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4164*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[0] = UINT32_C(0xFFFFFFFF);
4165*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[1] = -(uint32_t) (w8 >= 2);
4166*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[2] = -(uint32_t) (w8 >= 4);
4167*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_even[3] = -(uint32_t) (w8 >= 6);
4168*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[0] = -(uint32_t) (w8 >= 1);
4169*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[1] = -(uint32_t) (w8 >= 3);
4170*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[2] = -(uint32_t) (w8 >= 5);
4171*4bdc9457SAndroid Build Coastguard Worker params->sse.mask_odd[3] = -(uint32_t) (w8 >= 7);
4172*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4173*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4174*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[0] = UINT32_C(0xFFFFFFFF);
4175*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[1] = -(uint32_t) (w4 >= 1);
4176*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[2] = -(uint32_t) (w4 >= 2);
4177*4bdc9457SAndroid Build Coastguard Worker params->neon.mask[3] = -(uint32_t) (w4 >= 3);
4178*4bdc9457SAndroid Build Coastguard Worker
4179*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4180*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[0] = UINT32_C(0xFFFFFFFF);
4181*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[1] = -(uint32_t) (w8 >= 2);
4182*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[2] = -(uint32_t) (w8 >= 4);
4183*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_even[3] = -(uint32_t) (w8 >= 6);
4184*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[0] = -(uint32_t) (w8 >= 1);
4185*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[1] = -(uint32_t) (w8 >= 3);
4186*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[2] = -(uint32_t) (w8 >= 5);
4187*4bdc9457SAndroid Build Coastguard Worker params->neon.mask_odd[3] = -(uint32_t) (w8 >= 7);
4188*4bdc9457SAndroid Build Coastguard Worker #else
4189*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4190*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4191*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4192*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4193*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4194*4bdc9457SAndroid Build Coastguard Worker
4195*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4196*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4197*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4198*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4199*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4200*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4201*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4202*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4203*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4204*4bdc9457SAndroid Build Coastguard Worker #endif
4205*4bdc9457SAndroid Build Coastguard Worker }
4206*4bdc9457SAndroid Build Coastguard Worker
xnn_init_scalar_f32_chw_params(union xnn_f32_chw_params params[XNN_MIN_ELEMENTS (1)],uint32_t width,float output_min,float output_max)4207*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_scalar_f32_chw_params(
4208*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_chw_params params[XNN_MIN_ELEMENTS(1)],
4209*4bdc9457SAndroid Build Coastguard Worker uint32_t width,
4210*4bdc9457SAndroid Build Coastguard Worker float output_min,
4211*4bdc9457SAndroid Build Coastguard Worker float output_max)
4212*4bdc9457SAndroid Build Coastguard Worker {
4213*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = output_min;
4214*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = output_max;
4215*4bdc9457SAndroid Build Coastguard Worker
4216*4bdc9457SAndroid Build Coastguard Worker const uint32_t w4 = (width - 1) & 3;
4217*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[0] = UINT32_C(0xFFFFFFFF);
4218*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[1] = -(uint32_t) (w4 >= 1);
4219*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[2] = -(uint32_t) (w4 >= 2);
4220*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask[3] = -(uint32_t) (w4 >= 3);
4221*4bdc9457SAndroid Build Coastguard Worker
4222*4bdc9457SAndroid Build Coastguard Worker const uint32_t w8 = (width - 1) & 7;
4223*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[0] = UINT32_C(0xFFFFFFFF);
4224*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[1] = -(uint32_t) (w8 >= 2);
4225*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[2] = -(uint32_t) (w8 >= 4);
4226*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_even[3] = -(uint32_t) (w8 >= 6);
4227*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[0] = -(uint32_t) (w8 >= 1);
4228*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[1] = -(uint32_t) (w8 >= 3);
4229*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[2] = -(uint32_t) (w8 >= 5);
4230*4bdc9457SAndroid Build Coastguard Worker params->scalar.mask_odd[3] = -(uint32_t) (w8 >= 7);
4231*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4232*4bdc9457SAndroid Build Coastguard Worker }
4233*4bdc9457SAndroid Build Coastguard Worker
4234*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_s8_minmax_sse2_params(union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_min,int8_t output_max)4235*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_s8_minmax_sse2_params(
4236*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4237*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4238*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4239*4bdc9457SAndroid Build Coastguard Worker {
4240*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4241*4bdc9457SAndroid Build Coastguard Worker
4242*4bdc9457SAndroid Build Coastguard Worker const uint8_t output_min_with_bias = UINT8_C(0x80) ^ (uint8_t) output_min;
4243*4bdc9457SAndroid Build Coastguard Worker const uint8_t output_max_with_bias = UINT8_C(0x80) ^ (uint8_t) output_max;
4244*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4245*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[i] = UINT8_C(0x80);
4246*4bdc9457SAndroid Build Coastguard Worker params->sse2.min_with_bias[i] = output_min_with_bias;
4247*4bdc9457SAndroid Build Coastguard Worker params->sse2.max_with_bias[i] = output_max_with_bias;
4248*4bdc9457SAndroid Build Coastguard Worker }
4249*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
4250*4bdc9457SAndroid Build Coastguard Worker }
4251*4bdc9457SAndroid Build Coastguard Worker
xnn_init_s8_minmax_sse4_params(union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_min,int8_t output_max)4252*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_s8_minmax_sse4_params(
4253*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4254*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4255*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4256*4bdc9457SAndroid Build Coastguard Worker {
4257*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4258*4bdc9457SAndroid Build Coastguard Worker
4259*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4260*4bdc9457SAndroid Build Coastguard Worker params->sse4.min[i] = output_min;
4261*4bdc9457SAndroid Build Coastguard Worker params->sse4.max[i] = output_max;
4262*4bdc9457SAndroid Build Coastguard Worker }
4263*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4);
4264*4bdc9457SAndroid Build Coastguard Worker }
4265*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4266*4bdc9457SAndroid Build Coastguard Worker
4267*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_s8_minmax_neon_params(union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_min,int8_t output_max)4268*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_s8_minmax_neon_params(
4269*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4270*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4271*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4272*4bdc9457SAndroid Build Coastguard Worker {
4273*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4274*4bdc9457SAndroid Build Coastguard Worker
4275*4bdc9457SAndroid Build Coastguard Worker params->neon.min = output_min;
4276*4bdc9457SAndroid Build Coastguard Worker params->neon.max = output_max;
4277*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
4278*4bdc9457SAndroid Build Coastguard Worker }
4279*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4280*4bdc9457SAndroid Build Coastguard Worker
4281*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_s8_minmax_wasmsimd_params(union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_min,int8_t output_max)4282*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_s8_minmax_wasmsimd_params(
4283*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4284*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4285*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4286*4bdc9457SAndroid Build Coastguard Worker {
4287*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4288*4bdc9457SAndroid Build Coastguard Worker
4289*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4290*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[i] = output_min;
4291*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[i] = output_max;
4292*4bdc9457SAndroid Build Coastguard Worker }
4293*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
4294*4bdc9457SAndroid Build Coastguard Worker }
4295*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4296*4bdc9457SAndroid Build Coastguard Worker
xnn_init_s8_minmax_scalar_params(union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t output_min,int8_t output_max)4297*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_s8_minmax_scalar_params(
4298*4bdc9457SAndroid Build Coastguard Worker union xnn_s8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4299*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4300*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4301*4bdc9457SAndroid Build Coastguard Worker {
4302*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4303*4bdc9457SAndroid Build Coastguard Worker
4304*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = (int32_t) output_min;
4305*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = (int32_t) output_max;
4306*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4307*4bdc9457SAndroid Build Coastguard Worker }
4308*4bdc9457SAndroid Build Coastguard Worker
xnn_init_u8_minmax_params(union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t output_min,uint8_t output_max)4309*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_u8_minmax_params(
4310*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4311*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4312*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4313*4bdc9457SAndroid Build Coastguard Worker {
4314*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4315*4bdc9457SAndroid Build Coastguard Worker
4316*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
4317*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4318*4bdc9457SAndroid Build Coastguard Worker params->sse2.min[i] = output_min;
4319*4bdc9457SAndroid Build Coastguard Worker params->sse2.max[i] = output_max;
4320*4bdc9457SAndroid Build Coastguard Worker }
4321*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
4322*4bdc9457SAndroid Build Coastguard Worker #elif XNN_ARCH_ARM || XNN_ARCH_ARM64
4323*4bdc9457SAndroid Build Coastguard Worker params->neon.min = output_min;
4324*4bdc9457SAndroid Build Coastguard Worker params->neon.max = output_max;
4325*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
4326*4bdc9457SAndroid Build Coastguard Worker #else
4327*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = (uint32_t) output_min;
4328*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = (uint32_t) output_max;
4329*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4330*4bdc9457SAndroid Build Coastguard Worker #endif
4331*4bdc9457SAndroid Build Coastguard Worker }
4332*4bdc9457SAndroid Build Coastguard Worker
4333*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_u8_minmax_sse2_params(union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t output_min,uint8_t output_max)4334*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_u8_minmax_sse2_params(
4335*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4336*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4337*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4338*4bdc9457SAndroid Build Coastguard Worker {
4339*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4340*4bdc9457SAndroid Build Coastguard Worker
4341*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4342*4bdc9457SAndroid Build Coastguard Worker params->sse2.min[i] = output_min;
4343*4bdc9457SAndroid Build Coastguard Worker params->sse2.max[i] = output_max;
4344*4bdc9457SAndroid Build Coastguard Worker }
4345*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
4346*4bdc9457SAndroid Build Coastguard Worker }
4347*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4348*4bdc9457SAndroid Build Coastguard Worker
4349*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_u8_minmax_wasmsimd_params(union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t output_min,uint8_t output_max)4350*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_u8_minmax_wasmsimd_params(
4351*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4352*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4353*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4354*4bdc9457SAndroid Build Coastguard Worker {
4355*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4356*4bdc9457SAndroid Build Coastguard Worker
4357*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4358*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.min[i] = output_min;
4359*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.max[i] = output_max;
4360*4bdc9457SAndroid Build Coastguard Worker }
4361*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
4362*4bdc9457SAndroid Build Coastguard Worker }
4363*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4364*4bdc9457SAndroid Build Coastguard Worker
4365*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_u8_minmax_neon_params(union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t output_min,uint8_t output_max)4366*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_u8_minmax_neon_params(
4367*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4368*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4369*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4370*4bdc9457SAndroid Build Coastguard Worker {
4371*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4372*4bdc9457SAndroid Build Coastguard Worker
4373*4bdc9457SAndroid Build Coastguard Worker params->neon.min = output_min;
4374*4bdc9457SAndroid Build Coastguard Worker params->neon.max = output_max;
4375*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
4376*4bdc9457SAndroid Build Coastguard Worker }
4377*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4378*4bdc9457SAndroid Build Coastguard Worker
xnn_init_u8_minmax_scalar_params(union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t output_min,uint8_t output_max)4379*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_u8_minmax_scalar_params(
4380*4bdc9457SAndroid Build Coastguard Worker union xnn_u8_minmax_params params[XNN_MIN_ELEMENTS(1)],
4381*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4382*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4383*4bdc9457SAndroid Build Coastguard Worker {
4384*4bdc9457SAndroid Build Coastguard Worker assert(output_min < output_max);
4385*4bdc9457SAndroid Build Coastguard Worker
4386*4bdc9457SAndroid Build Coastguard Worker params->scalar.min = (uint32_t) output_min;
4387*4bdc9457SAndroid Build Coastguard Worker params->scalar.max = (uint32_t) output_max;
4388*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4389*4bdc9457SAndroid Build Coastguard Worker }
4390*4bdc9457SAndroid Build Coastguard Worker
4391*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_add_minmax_sse2_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4392*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_sse2_params(
4393*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4394*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4395*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4396*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4397*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4398*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4399*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4400*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4401*4bdc9457SAndroid Build Coastguard Worker {
4402*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4403*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4404*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4405*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4406*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4407*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4408*4bdc9457SAndroid Build Coastguard Worker
4409*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4410*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4411*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4412*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4413*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4414*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4415*4bdc9457SAndroid Build Coastguard Worker
4416*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4417*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4418*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4419*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4420*4bdc9457SAndroid Build Coastguard Worker
4421*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4422*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4423*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4424*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4425*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4426*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4427*4bdc9457SAndroid Build Coastguard Worker
4428*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4429*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4430*4bdc9457SAndroid Build Coastguard Worker
4431*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4432*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4433*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4434*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[i] = bias;
4435*4bdc9457SAndroid Build Coastguard Worker }
4436*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4437*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4438*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4439*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4440*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4441*4bdc9457SAndroid Build Coastguard Worker params->sse2.a_multiplier_lo[i] = a_multiplier_lo;
4442*4bdc9457SAndroid Build Coastguard Worker params->sse2.a_multiplier_hi[i] = a_multiplier_hi;
4443*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier_lo[i] = b_multiplier_lo;
4444*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier_hi[i] = b_multiplier_hi;
4445*4bdc9457SAndroid Build Coastguard Worker }
4446*4bdc9457SAndroid Build Coastguard Worker params->sse2.shift = shift;
4447*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier = (uint32_t) b_multiplier;
4448*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4449*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4450*4bdc9457SAndroid Build Coastguard Worker }
4451*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4452*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_min[i] = output_min;
4453*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_max[i] = output_max;
4454*4bdc9457SAndroid Build Coastguard Worker }
4455*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
4456*4bdc9457SAndroid Build Coastguard Worker }
4457*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_add_minmax_sse4_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4458*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_sse4_params(
4459*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4460*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4461*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4462*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4463*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4464*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4465*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4466*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4467*4bdc9457SAndroid Build Coastguard Worker {
4468*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4469*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4470*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4471*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4472*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4473*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4474*4bdc9457SAndroid Build Coastguard Worker
4475*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4476*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4477*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4478*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4479*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4480*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4481*4bdc9457SAndroid Build Coastguard Worker
4482*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4483*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4484*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4485*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4486*4bdc9457SAndroid Build Coastguard Worker
4487*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4488*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4489*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4490*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4491*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4492*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4493*4bdc9457SAndroid Build Coastguard Worker
4494*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4495*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4496*4bdc9457SAndroid Build Coastguard Worker
4497*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4498*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4499*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4500*4bdc9457SAndroid Build Coastguard Worker params->sse4.bias[i] = bias;
4501*4bdc9457SAndroid Build Coastguard Worker params->sse4.a_multiplier[i] = a_multiplier;
4502*4bdc9457SAndroid Build Coastguard Worker params->sse4.b_multiplier[i] = b_multiplier;
4503*4bdc9457SAndroid Build Coastguard Worker }
4504*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
4505*4bdc9457SAndroid Build Coastguard Worker params->sse4.shift[i] = (uint64_t) shift;
4506*4bdc9457SAndroid Build Coastguard Worker }
4507*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4508*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4509*4bdc9457SAndroid Build Coastguard Worker }
4510*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4511*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_min[i] = output_min;
4512*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_max[i] = output_max;
4513*4bdc9457SAndroid Build Coastguard Worker }
4514*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4);
4515*4bdc9457SAndroid Build Coastguard Worker }
4516*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_add_minmax_avx2_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4517*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_avx2_params(
4518*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4519*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4520*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4521*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4522*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4523*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4524*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4525*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4526*4bdc9457SAndroid Build Coastguard Worker {
4527*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4528*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4529*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4530*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4531*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4532*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4533*4bdc9457SAndroid Build Coastguard Worker
4534*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4535*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4536*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4537*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4538*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4539*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4540*4bdc9457SAndroid Build Coastguard Worker
4541*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4542*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4543*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4544*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4545*4bdc9457SAndroid Build Coastguard Worker
4546*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4547*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4548*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4549*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4550*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4551*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4552*4bdc9457SAndroid Build Coastguard Worker
4553*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4554*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4555*4bdc9457SAndroid Build Coastguard Worker
4556*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4557*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4558*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4559*4bdc9457SAndroid Build Coastguard Worker params->avx2.bias[i] = bias;
4560*4bdc9457SAndroid Build Coastguard Worker params->avx2.a_multiplier[i] = a_multiplier;
4561*4bdc9457SAndroid Build Coastguard Worker params->avx2.b_multiplier[i] = b_multiplier;
4562*4bdc9457SAndroid Build Coastguard Worker }
4563*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4564*4bdc9457SAndroid Build Coastguard Worker params->avx2.shift[i] = (uint64_t) shift;
4565*4bdc9457SAndroid Build Coastguard Worker }
4566*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4567*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4568*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_min[i] = output_min;
4569*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_max[i] = output_max;
4570*4bdc9457SAndroid Build Coastguard Worker }
4571*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
4572*4bdc9457SAndroid Build Coastguard Worker }
4573*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_add_minmax_avx512_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4574*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_avx512_params(
4575*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4576*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4577*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4578*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4579*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4580*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4581*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4582*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4583*4bdc9457SAndroid Build Coastguard Worker {
4584*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4585*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4586*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4587*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4588*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4589*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4590*4bdc9457SAndroid Build Coastguard Worker
4591*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4592*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4593*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4594*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4595*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4596*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4597*4bdc9457SAndroid Build Coastguard Worker
4598*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4599*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4600*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4601*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4602*4bdc9457SAndroid Build Coastguard Worker
4603*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4604*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4605*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4606*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4607*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4608*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4609*4bdc9457SAndroid Build Coastguard Worker
4610*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4611*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4612*4bdc9457SAndroid Build Coastguard Worker
4613*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4614*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4615*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4616*4bdc9457SAndroid Build Coastguard Worker params->avx512.bias[i] = bias;
4617*4bdc9457SAndroid Build Coastguard Worker params->avx512.a_multiplier[i] = a_multiplier;
4618*4bdc9457SAndroid Build Coastguard Worker params->avx512.b_multiplier[i] = b_multiplier;
4619*4bdc9457SAndroid Build Coastguard Worker }
4620*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4621*4bdc9457SAndroid Build Coastguard Worker params->avx512.shift[i] = (uint64_t) shift;
4622*4bdc9457SAndroid Build Coastguard Worker }
4623*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
4624*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4625*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_min[i] = output_min;
4626*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_max[i] = output_max;
4627*4bdc9457SAndroid Build Coastguard Worker }
4628*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
4629*4bdc9457SAndroid Build Coastguard Worker }
4630*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4631*4bdc9457SAndroid Build Coastguard Worker
4632*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_add_minmax_neon_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4633*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_neon_params(
4634*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4635*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4636*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4637*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4638*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4639*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4640*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4641*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4642*4bdc9457SAndroid Build Coastguard Worker {
4643*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4644*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4645*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4646*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4647*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4648*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4649*4bdc9457SAndroid Build Coastguard Worker
4650*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4651*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4652*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4653*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4654*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4655*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4656*4bdc9457SAndroid Build Coastguard Worker
4657*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4658*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4659*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4660*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4661*4bdc9457SAndroid Build Coastguard Worker
4662*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4663*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4664*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4665*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4666*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4667*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4668*4bdc9457SAndroid Build Coastguard Worker
4669*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4670*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4671*4bdc9457SAndroid Build Coastguard Worker
4672*4bdc9457SAndroid Build Coastguard Worker params->neon.a_zero_point = a_zero_point;
4673*4bdc9457SAndroid Build Coastguard Worker params->neon.b_zero_point = b_zero_point;
4674*4bdc9457SAndroid Build Coastguard Worker params->neon.a_multiplier = (int32_t) a_multiplier;
4675*4bdc9457SAndroid Build Coastguard Worker params->neon.b_multiplier = (int32_t) b_multiplier;
4676*4bdc9457SAndroid Build Coastguard Worker params->neon.right_shift = (int32_t) -shift;
4677*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) (uint16_t) output_zero_point;
4678*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
4679*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
4680*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
4681*4bdc9457SAndroid Build Coastguard Worker }
4682*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4683*4bdc9457SAndroid Build Coastguard Worker
4684*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_add_minmax_wasmsimd_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4685*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_wasmsimd_params(
4686*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4687*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4688*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4689*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4690*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4691*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4692*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4693*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4694*4bdc9457SAndroid Build Coastguard Worker {
4695*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4696*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4697*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4698*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4699*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4700*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4701*4bdc9457SAndroid Build Coastguard Worker
4702*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4703*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4704*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4705*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4706*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4707*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4708*4bdc9457SAndroid Build Coastguard Worker
4709*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4710*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4711*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4712*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4713*4bdc9457SAndroid Build Coastguard Worker
4714*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4715*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4716*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4717*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4718*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4719*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4720*4bdc9457SAndroid Build Coastguard Worker
4721*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4722*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4723*4bdc9457SAndroid Build Coastguard Worker
4724*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4725*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4726*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
4727*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias[i] = bias;
4728*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.a_multiplier[i] = a_multiplier;
4729*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.b_multiplier[i] = b_multiplier;
4730*4bdc9457SAndroid Build Coastguard Worker }
4731*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.shift = shift;
4732*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4733*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
4734*4bdc9457SAndroid Build Coastguard Worker }
4735*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4736*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_min[i] = output_min;
4737*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_max[i] = output_max;
4738*4bdc9457SAndroid Build Coastguard Worker }
4739*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
4740*4bdc9457SAndroid Build Coastguard Worker }
4741*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
4742*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_add_minmax_scalar_params(union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float a_output_scale,float b_output_scale,uint8_t output_min,uint8_t output_max)4743*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_add_minmax_scalar_params(
4744*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4745*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
4746*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
4747*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
4748*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4749*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4750*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
4751*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
4752*4bdc9457SAndroid Build Coastguard Worker {
4753*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4754*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4755*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4756*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4757*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4758*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4759*4bdc9457SAndroid Build Coastguard Worker
4760*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4761*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4762*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4763*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4764*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4765*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4766*4bdc9457SAndroid Build Coastguard Worker
4767*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4768*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4769*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4770*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4771*4bdc9457SAndroid Build Coastguard Worker
4772*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4773*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4774*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4775*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4776*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4777*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4778*4bdc9457SAndroid Build Coastguard Worker
4779*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4780*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4781*4bdc9457SAndroid Build Coastguard Worker
4782*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4783*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = rounding - a_multiplier * (int32_t) (uint32_t) a_zero_point - b_multiplier * (int32_t) (uint32_t) b_zero_point;
4784*4bdc9457SAndroid Build Coastguard Worker params->scalar.a_multiplier = a_multiplier;
4785*4bdc9457SAndroid Build Coastguard Worker params->scalar.b_multiplier = b_multiplier;
4786*4bdc9457SAndroid Build Coastguard Worker params->scalar.shift = shift;
4787*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_min_less_zero_point = (int32_t) (uint32_t) output_min - (int32_t) (uint32_t) output_zero_point;
4788*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_max_less_zero_point = (int32_t) (uint32_t) output_max - (int32_t) (uint32_t) output_zero_point;
4789*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_zero_point = (int32_t) (uint32_t) output_zero_point;
4790*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
4791*4bdc9457SAndroid Build Coastguard Worker }
4792*4bdc9457SAndroid Build Coastguard Worker
4793*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_add_minmax_sse2_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)4794*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_sse2_params(
4795*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4796*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
4797*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
4798*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
4799*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4800*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4801*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4802*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4803*4bdc9457SAndroid Build Coastguard Worker {
4804*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4805*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4806*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4807*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4808*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4809*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4810*4bdc9457SAndroid Build Coastguard Worker
4811*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4812*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4813*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4814*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4815*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4816*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4817*4bdc9457SAndroid Build Coastguard Worker
4818*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4819*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4820*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4821*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4822*4bdc9457SAndroid Build Coastguard Worker
4823*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4824*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4825*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4826*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4827*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4828*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4829*4bdc9457SAndroid Build Coastguard Worker
4830*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4831*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4832*4bdc9457SAndroid Build Coastguard Worker
4833*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4834*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4835*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4836*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[i] = bias;
4837*4bdc9457SAndroid Build Coastguard Worker }
4838*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4839*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4840*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4841*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4842*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4843*4bdc9457SAndroid Build Coastguard Worker params->sse2.a_multiplier_lo[i] = a_multiplier_lo;
4844*4bdc9457SAndroid Build Coastguard Worker params->sse2.a_multiplier_hi[i] = a_multiplier_hi;
4845*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier_lo[i] = b_multiplier_lo;
4846*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier_hi[i] = b_multiplier_hi;
4847*4bdc9457SAndroid Build Coastguard Worker }
4848*4bdc9457SAndroid Build Coastguard Worker params->sse2.shift = shift;
4849*4bdc9457SAndroid Build Coastguard Worker params->sse2.b_multiplier = (uint32_t) b_multiplier;
4850*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4851*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
4852*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_min[i] = (int16_t) output_min;
4853*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_max[i] = (int16_t) output_max;
4854*4bdc9457SAndroid Build Coastguard Worker }
4855*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
4856*4bdc9457SAndroid Build Coastguard Worker }
4857*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_add_minmax_sse4_mul16_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)4858*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_sse4_mul16_params(
4859*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4860*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
4861*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
4862*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
4863*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4864*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4865*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4866*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4867*4bdc9457SAndroid Build Coastguard Worker {
4868*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4869*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4870*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4871*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4872*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4873*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4874*4bdc9457SAndroid Build Coastguard Worker
4875*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4876*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4877*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4878*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4879*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4880*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4881*4bdc9457SAndroid Build Coastguard Worker
4882*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4883*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4884*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4885*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4886*4bdc9457SAndroid Build Coastguard Worker
4887*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4888*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4889*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4890*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4891*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4892*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4893*4bdc9457SAndroid Build Coastguard Worker
4894*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4895*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4896*4bdc9457SAndroid Build Coastguard Worker
4897*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4898*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4899*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4900*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.bias[i] = bias;
4901*4bdc9457SAndroid Build Coastguard Worker }
4902*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_lo = (uint16_t) a_multiplier;
4903*4bdc9457SAndroid Build Coastguard Worker const uint16_t a_multiplier_hi = (uint16_t) ((uint32_t) a_multiplier >> 16);
4904*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_lo = (uint16_t) b_multiplier;
4905*4bdc9457SAndroid Build Coastguard Worker const uint16_t b_multiplier_hi = (uint16_t) ((uint32_t) b_multiplier >> 16);
4906*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4907*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.a_multiplier_lo[i] = a_multiplier_lo;
4908*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.a_multiplier_hi[i] = a_multiplier_hi;
4909*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.b_multiplier_lo[i] = b_multiplier_lo;
4910*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.b_multiplier_hi[i] = b_multiplier_hi;
4911*4bdc9457SAndroid Build Coastguard Worker }
4912*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.shift = shift;
4913*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.b_multiplier = (uint32_t) b_multiplier;
4914*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4915*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.output_zero_point[i] = (int16_t) output_zero_point;
4916*4bdc9457SAndroid Build Coastguard Worker }
4917*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4918*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.output_min[i] = output_min;
4919*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul16.output_max[i] = output_max;
4920*4bdc9457SAndroid Build Coastguard Worker }
4921*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4_mul16);
4922*4bdc9457SAndroid Build Coastguard Worker }
4923*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_add_minmax_sse4_mul32_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)4924*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_sse4_mul32_params(
4925*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4926*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
4927*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
4928*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
4929*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4930*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4931*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4932*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4933*4bdc9457SAndroid Build Coastguard Worker {
4934*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4935*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4936*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4937*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4938*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4939*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4940*4bdc9457SAndroid Build Coastguard Worker
4941*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
4942*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
4943*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
4944*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
4945*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
4946*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
4947*4bdc9457SAndroid Build Coastguard Worker
4948*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
4949*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
4950*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
4951*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
4952*4bdc9457SAndroid Build Coastguard Worker
4953*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
4954*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
4955*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
4956*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
4957*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
4958*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
4959*4bdc9457SAndroid Build Coastguard Worker
4960*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
4961*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
4962*4bdc9457SAndroid Build Coastguard Worker
4963*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
4964*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
4965*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
4966*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.bias[i] = bias;
4967*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.a_multiplier[i] = a_multiplier;
4968*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.b_multiplier[i] = b_multiplier;
4969*4bdc9457SAndroid Build Coastguard Worker }
4970*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
4971*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.shift[i] = (uint64_t) shift;
4972*4bdc9457SAndroid Build Coastguard Worker }
4973*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
4974*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.output_zero_point[i] = (int16_t) output_zero_point;
4975*4bdc9457SAndroid Build Coastguard Worker }
4976*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
4977*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.output_min[i] = output_min;
4978*4bdc9457SAndroid Build Coastguard Worker params->sse4_mul32.output_max[i] = output_max;
4979*4bdc9457SAndroid Build Coastguard Worker }
4980*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4_mul32);
4981*4bdc9457SAndroid Build Coastguard Worker }
4982*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_add_minmax_avx2_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)4983*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_avx2_params(
4984*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
4985*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
4986*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
4987*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
4988*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
4989*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
4990*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
4991*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
4992*4bdc9457SAndroid Build Coastguard Worker {
4993*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
4994*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
4995*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
4996*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
4997*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
4998*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
4999*4bdc9457SAndroid Build Coastguard Worker
5000*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5001*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5002*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
5003*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
5004*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5005*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5006*4bdc9457SAndroid Build Coastguard Worker
5007*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
5008*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5009*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
5010*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
5011*4bdc9457SAndroid Build Coastguard Worker
5012*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5013*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5014*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5015*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5016*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
5017*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
5018*4bdc9457SAndroid Build Coastguard Worker
5019*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5020*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5021*4bdc9457SAndroid Build Coastguard Worker
5022*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
5023*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5024*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5025*4bdc9457SAndroid Build Coastguard Worker params->avx2.bias[i] = bias;
5026*4bdc9457SAndroid Build Coastguard Worker params->avx2.a_multiplier[i] = a_multiplier;
5027*4bdc9457SAndroid Build Coastguard Worker params->avx2.b_multiplier[i] = b_multiplier;
5028*4bdc9457SAndroid Build Coastguard Worker }
5029*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5030*4bdc9457SAndroid Build Coastguard Worker params->avx2.shift[i] = (uint64_t) shift;
5031*4bdc9457SAndroid Build Coastguard Worker }
5032*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5033*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
5034*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_min[i] = output_min;
5035*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_max[i] = output_max;
5036*4bdc9457SAndroid Build Coastguard Worker }
5037*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
5038*4bdc9457SAndroid Build Coastguard Worker }
5039*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_add_minmax_avx512_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)5040*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_avx512_params(
5041*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5042*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5043*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5044*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5045*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
5046*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
5047*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5048*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5049*4bdc9457SAndroid Build Coastguard Worker {
5050*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
5051*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
5052*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
5053*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
5054*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
5055*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
5056*4bdc9457SAndroid Build Coastguard Worker
5057*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5058*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5059*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
5060*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
5061*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5062*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5063*4bdc9457SAndroid Build Coastguard Worker
5064*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
5065*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5066*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
5067*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
5068*4bdc9457SAndroid Build Coastguard Worker
5069*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5070*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5071*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5072*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5073*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
5074*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
5075*4bdc9457SAndroid Build Coastguard Worker
5076*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5077*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5078*4bdc9457SAndroid Build Coastguard Worker
5079*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
5080*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5081*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5082*4bdc9457SAndroid Build Coastguard Worker params->avx512.bias[i] = bias;
5083*4bdc9457SAndroid Build Coastguard Worker params->avx512.a_multiplier[i] = a_multiplier;
5084*4bdc9457SAndroid Build Coastguard Worker params->avx512.b_multiplier[i] = b_multiplier;
5085*4bdc9457SAndroid Build Coastguard Worker }
5086*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5087*4bdc9457SAndroid Build Coastguard Worker params->avx512.shift[i] = (uint64_t) shift;
5088*4bdc9457SAndroid Build Coastguard Worker }
5089*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
5090*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
5091*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_min[i] = output_min;
5092*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_max[i] = output_max;
5093*4bdc9457SAndroid Build Coastguard Worker }
5094*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
5095*4bdc9457SAndroid Build Coastguard Worker }
5096*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5097*4bdc9457SAndroid Build Coastguard Worker
5098*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_add_minmax_neon_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)5099*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_neon_params(
5100*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5101*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5102*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5103*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5104*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
5105*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
5106*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5107*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5108*4bdc9457SAndroid Build Coastguard Worker {
5109*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
5110*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
5111*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
5112*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
5113*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
5114*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
5115*4bdc9457SAndroid Build Coastguard Worker
5116*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5117*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5118*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
5119*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
5120*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5121*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5122*4bdc9457SAndroid Build Coastguard Worker
5123*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
5124*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5125*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
5126*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
5127*4bdc9457SAndroid Build Coastguard Worker
5128*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5129*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5130*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5131*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5132*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
5133*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
5134*4bdc9457SAndroid Build Coastguard Worker
5135*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5136*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5137*4bdc9457SAndroid Build Coastguard Worker
5138*4bdc9457SAndroid Build Coastguard Worker params->neon.a_zero_point = a_zero_point;
5139*4bdc9457SAndroid Build Coastguard Worker params->neon.b_zero_point = b_zero_point;
5140*4bdc9457SAndroid Build Coastguard Worker params->neon.a_multiplier = (int32_t) a_multiplier;
5141*4bdc9457SAndroid Build Coastguard Worker params->neon.b_multiplier = (int32_t) b_multiplier;
5142*4bdc9457SAndroid Build Coastguard Worker params->neon.right_shift = (int32_t) -shift;
5143*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) output_zero_point;
5144*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
5145*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
5146*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
5147*4bdc9457SAndroid Build Coastguard Worker }
5148*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5149*4bdc9457SAndroid Build Coastguard Worker
5150*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_add_minmax_wasmsimd_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)5151*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_wasmsimd_params(
5152*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5153*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5154*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5155*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5156*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
5157*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
5158*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5159*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5160*4bdc9457SAndroid Build Coastguard Worker {
5161*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
5162*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
5163*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
5164*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
5165*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
5166*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
5167*4bdc9457SAndroid Build Coastguard Worker
5168*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5169*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5170*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
5171*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
5172*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5173*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5174*4bdc9457SAndroid Build Coastguard Worker
5175*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
5176*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5177*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
5178*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
5179*4bdc9457SAndroid Build Coastguard Worker
5180*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5181*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5182*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5183*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5184*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
5185*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
5186*4bdc9457SAndroid Build Coastguard Worker
5187*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5188*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5189*4bdc9457SAndroid Build Coastguard Worker
5190*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
5191*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5192*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5193*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias[i] = bias;
5194*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.a_multiplier[i] = a_multiplier;
5195*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.b_multiplier[i] = b_multiplier;
5196*4bdc9457SAndroid Build Coastguard Worker }
5197*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.shift = shift;
5198*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5199*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
5200*4bdc9457SAndroid Build Coastguard Worker }
5201*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5202*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_min[i] = output_min;
5203*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_max[i] = output_max;
5204*4bdc9457SAndroid Build Coastguard Worker }
5205*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
5206*4bdc9457SAndroid Build Coastguard Worker }
5207*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5208*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_add_minmax_scalar_params(union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float a_output_scale,float b_output_scale,int8_t output_min,int8_t output_max)5209*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_add_minmax_scalar_params(
5210*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_add_minmax_params params[XNN_MIN_ELEMENTS(1)],
5211*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5212*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5213*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5214*4bdc9457SAndroid Build Coastguard Worker float a_output_scale,
5215*4bdc9457SAndroid Build Coastguard Worker float b_output_scale,
5216*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5217*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5218*4bdc9457SAndroid Build Coastguard Worker {
5219*4bdc9457SAndroid Build Coastguard Worker const float abs_a_output_scale = fabsf(a_output_scale);
5220*4bdc9457SAndroid Build Coastguard Worker const float abs_b_output_scale = fabsf(b_output_scale);
5221*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale >= 0x1.0p-10f);
5222*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale >= 0x1.0p-10f);
5223*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_output_scale < 0x1.0p+8f);
5224*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_output_scale < 0x1.0p+8f);
5225*4bdc9457SAndroid Build Coastguard Worker
5226*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5227*4bdc9457SAndroid Build Coastguard Worker const float max_abs_output_scale = math_max_f32(abs_a_output_scale, abs_b_output_scale);
5228*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale >= 0x1.0p-10f);
5229*4bdc9457SAndroid Build Coastguard Worker assert(max_abs_output_scale < 0x1.0p+8f);
5230*4bdc9457SAndroid Build Coastguard Worker const uint32_t max_scale_bits = float_as_uint32(max_abs_output_scale);
5231*4bdc9457SAndroid Build Coastguard Worker const int32_t max_scale_exponent = (int32_t) (max_scale_bits >> 23) - 127;
5232*4bdc9457SAndroid Build Coastguard Worker
5233*4bdc9457SAndroid Build Coastguard Worker // Shift is in [12, 30] range.
5234*4bdc9457SAndroid Build Coastguard Worker const uint32_t shift = (uint32_t) (20 /* multiplier bits */ - max_scale_exponent);
5235*4bdc9457SAndroid Build Coastguard Worker assert(shift <= 30);
5236*4bdc9457SAndroid Build Coastguard Worker assert(shift >= 12);
5237*4bdc9457SAndroid Build Coastguard Worker
5238*4bdc9457SAndroid Build Coastguard Worker // Multipliers are in [0, 2**21) range, largest multiplier is in [2**20, 2**21) range.
5239*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_a_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_a_output_scale) + (shift << 23)));
5240*4bdc9457SAndroid Build Coastguard Worker const int32_t abs_b_multiplier = (int32_t) lrintf(uint32_as_float(float_as_uint32(abs_b_output_scale) + (shift << 23)));
5241*4bdc9457SAndroid Build Coastguard Worker assert(math_max_s32(abs_a_multiplier, abs_b_multiplier) >= INT32_C(0x00100000));
5242*4bdc9457SAndroid Build Coastguard Worker assert(abs_a_multiplier <= INT32_C(0x00200000));
5243*4bdc9457SAndroid Build Coastguard Worker assert(abs_b_multiplier <= INT32_C(0x00200000));
5244*4bdc9457SAndroid Build Coastguard Worker
5245*4bdc9457SAndroid Build Coastguard Worker const int32_t a_multiplier = signbit(a_output_scale) ? -abs_a_multiplier : abs_a_multiplier;
5246*4bdc9457SAndroid Build Coastguard Worker const int32_t b_multiplier = signbit(b_output_scale) ? -abs_b_multiplier : abs_b_multiplier;
5247*4bdc9457SAndroid Build Coastguard Worker
5248*4bdc9457SAndroid Build Coastguard Worker const int32_t rounding = INT32_C(1) << (shift - 1);
5249*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = rounding - a_multiplier * (int32_t) a_zero_point - b_multiplier * (int32_t) b_zero_point;
5250*4bdc9457SAndroid Build Coastguard Worker params->scalar.a_multiplier = a_multiplier;
5251*4bdc9457SAndroid Build Coastguard Worker params->scalar.b_multiplier = b_multiplier;
5252*4bdc9457SAndroid Build Coastguard Worker params->scalar.shift = shift;
5253*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
5254*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
5255*4bdc9457SAndroid Build Coastguard Worker params->scalar.output_zero_point = (int32_t) output_zero_point;
5256*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
5257*4bdc9457SAndroid Build Coastguard Worker }
5258*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_mul_minmax_fp32_scalar_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5259*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_fp32_scalar_params(
5260*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5261*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5262*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5263*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5264*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5265*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5266*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5267*4bdc9457SAndroid Build Coastguard Worker {
5268*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5269*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5270*4bdc9457SAndroid Build Coastguard Worker
5271*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.a_zero_point = (int16_t) (uint16_t) a_zero_point;
5272*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.b_zero_point = (int16_t) (uint16_t) b_zero_point;
5273*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.scale = product_output_scale;
5274*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.output_min_less_zero_point = (float) (int32_t) ((uint32_t) output_min - (uint32_t) output_zero_point);
5275*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.output_max_less_zero_point = (float) (int32_t) ((uint32_t) output_max - (uint32_t) output_zero_point);
5276*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.magic_bias = 12582912.0f;
5277*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) (uint32_t) output_zero_point;
5278*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar);
5279*4bdc9457SAndroid Build Coastguard Worker }
5280*4bdc9457SAndroid Build Coastguard Worker
5281*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_mul_minmax_fp32_neon_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5282*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_fp32_neon_params(
5283*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5284*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5285*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5286*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5287*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5288*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5289*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5290*4bdc9457SAndroid Build Coastguard Worker {
5291*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5292*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5293*4bdc9457SAndroid Build Coastguard Worker
5294*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.a_zero_point[0] = a_zero_point;
5295*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.a_zero_point[1] = a_zero_point;
5296*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.b_zero_point[0] = b_zero_point;
5297*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.b_zero_point[1] = b_zero_point;
5298*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = product_output_scale;
5299*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
5300*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5301*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
5302*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
5303*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
5304*4bdc9457SAndroid Build Coastguard Worker }
5305*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_mul_minmax_fp32_neonv8_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5306*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_fp32_neonv8_params(
5307*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5308*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5309*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5310*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5311*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5312*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5313*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5314*4bdc9457SAndroid Build Coastguard Worker {
5315*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5316*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5317*4bdc9457SAndroid Build Coastguard Worker
5318*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.a_zero_point[0] = a_zero_point;
5319*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.a_zero_point[1] = a_zero_point;
5320*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.b_zero_point[0] = b_zero_point;
5321*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.b_zero_point[1] = b_zero_point;
5322*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = product_output_scale;
5323*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
5324*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
5325*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
5326*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
5327*4bdc9457SAndroid Build Coastguard Worker }
5328*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_mul_minmax_rndnu_neon_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5329*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_rndnu_neon_params(
5330*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5331*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5332*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5333*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5334*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5335*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5336*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5337*4bdc9457SAndroid Build Coastguard Worker {
5338*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5339*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5340*4bdc9457SAndroid Build Coastguard Worker
5341*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5342*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(product_output_scale);
5343*4bdc9457SAndroid Build Coastguard Worker
5344*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
5345*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
5346*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
5347*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
5348*4bdc9457SAndroid Build Coastguard Worker
5349*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 15] range.
5350*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
5351*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
5352*4bdc9457SAndroid Build Coastguard Worker assert(shift < 16);
5353*4bdc9457SAndroid Build Coastguard Worker
5354*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 15] range.
5355*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
5356*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
5357*4bdc9457SAndroid Build Coastguard Worker
5358*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.a_zero_point[0] = a_zero_point;
5359*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.a_zero_point[1] = a_zero_point;
5360*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.b_zero_point[0] = b_zero_point;
5361*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.b_zero_point[1] = b_zero_point;
5362*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
5363*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
5364*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
5365*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
5366*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
5367*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
5368*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
5369*4bdc9457SAndroid Build Coastguard Worker }
5370*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5371*4bdc9457SAndroid Build Coastguard Worker
5372*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_mul_minmax_fp32_sse2_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5373*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_fp32_sse2_params(
5374*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5375*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5376*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5377*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5378*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5379*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5380*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5381*4bdc9457SAndroid Build Coastguard Worker {
5382*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5383*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5384*4bdc9457SAndroid Build Coastguard Worker
5385*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5386*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.a_zero_point[i] = (int16_t) (uint16_t) a_zero_point;
5387*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.b_zero_point[i] = (int16_t) (uint16_t) b_zero_point;
5388*4bdc9457SAndroid Build Coastguard Worker }
5389*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5390*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = product_output_scale;
5391*4bdc9457SAndroid Build Coastguard Worker }
5392*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5393*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) (uint16_t) output_zero_point;
5394*4bdc9457SAndroid Build Coastguard Worker }
5395*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5396*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = output_min;
5397*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max[i] = output_max;
5398*4bdc9457SAndroid Build Coastguard Worker }
5399*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
5400*4bdc9457SAndroid Build Coastguard Worker }
5401*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5402*4bdc9457SAndroid Build Coastguard Worker
5403*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_mul_minmax_fp32_wasmsimd_params(union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],uint8_t a_zero_point,uint8_t b_zero_point,uint8_t output_zero_point,float product_output_scale,uint8_t output_min,uint8_t output_max)5404*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_mul_minmax_fp32_wasmsimd_params(
5405*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5406*4bdc9457SAndroid Build Coastguard Worker uint8_t a_zero_point,
5407*4bdc9457SAndroid Build Coastguard Worker uint8_t b_zero_point,
5408*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
5409*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5410*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
5411*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
5412*4bdc9457SAndroid Build Coastguard Worker {
5413*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5414*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5415*4bdc9457SAndroid Build Coastguard Worker
5416*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5417*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5418*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5419*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5420*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.a_zero_point[i] = (int16_t) a_zero_point;
5421*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.b_zero_point[i] = (int16_t) b_zero_point;
5422*4bdc9457SAndroid Build Coastguard Worker }
5423*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5424*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = product_output_scale;
5425*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
5426*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
5427*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_output_zero_point;
5428*4bdc9457SAndroid Build Coastguard Worker }
5429*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5430*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
5431*4bdc9457SAndroid Build Coastguard Worker }
5432*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
5433*4bdc9457SAndroid Build Coastguard Worker }
5434*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5435*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_mul_minmax_fp32_scalar_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5436*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_scalar_params(
5437*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5438*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5439*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5440*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5441*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5442*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5443*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5444*4bdc9457SAndroid Build Coastguard Worker {
5445*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5446*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5447*4bdc9457SAndroid Build Coastguard Worker
5448*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.a_zero_point = (int16_t) a_zero_point;
5449*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.b_zero_point = (int16_t) b_zero_point;
5450*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.scale = product_output_scale;
5451*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5452*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5453*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.magic_bias = 12582912.0f;
5454*4bdc9457SAndroid Build Coastguard Worker params->fp32_scalar.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5455*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_scalar);
5456*4bdc9457SAndroid Build Coastguard Worker }
5457*4bdc9457SAndroid Build Coastguard Worker
5458*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_mul_minmax_fp32_neon_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5459*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_neon_params(
5460*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5461*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5462*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5463*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5464*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5465*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5466*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5467*4bdc9457SAndroid Build Coastguard Worker {
5468*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5469*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5470*4bdc9457SAndroid Build Coastguard Worker
5471*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.a_zero_point[0] = a_zero_point;
5472*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.a_zero_point[1] = a_zero_point;
5473*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.b_zero_point[0] = b_zero_point;
5474*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.b_zero_point[1] = b_zero_point;
5475*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.scale = product_output_scale;
5476*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias = 12582912.0f;
5477*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5478*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_min = output_min;
5479*4bdc9457SAndroid Build Coastguard Worker params->fp32_neon.output_max = output_max;
5480*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neon);
5481*4bdc9457SAndroid Build Coastguard Worker }
5482*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_mul_minmax_fp32_neonv8_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5483*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_neonv8_params(
5484*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5485*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5486*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5487*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5488*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5489*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5490*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5491*4bdc9457SAndroid Build Coastguard Worker {
5492*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5493*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5494*4bdc9457SAndroid Build Coastguard Worker
5495*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.a_zero_point[0] = a_zero_point;
5496*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.a_zero_point[1] = a_zero_point;
5497*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.b_zero_point[0] = b_zero_point;
5498*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.b_zero_point[1] = b_zero_point;
5499*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.scale = product_output_scale;
5500*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_zero_point = (int16_t) output_zero_point;
5501*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_min = output_min;
5502*4bdc9457SAndroid Build Coastguard Worker params->fp32_neonv8.output_max = output_max;
5503*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_neonv8);
5504*4bdc9457SAndroid Build Coastguard Worker }
5505*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_mul_minmax_rndnu_neon_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5506*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_rndnu_neon_params(
5507*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5508*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5509*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5510*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5511*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5512*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5513*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5514*4bdc9457SAndroid Build Coastguard Worker {
5515*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5516*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5517*4bdc9457SAndroid Build Coastguard Worker
5518*4bdc9457SAndroid Build Coastguard Worker // Compute requantization parameters.
5519*4bdc9457SAndroid Build Coastguard Worker const uint32_t scale_bits = float_as_uint32(product_output_scale);
5520*4bdc9457SAndroid Build Coastguard Worker
5521*4bdc9457SAndroid Build Coastguard Worker // Multiplier is in [0x40000000, 0x7FFFFF80] range.
5522*4bdc9457SAndroid Build Coastguard Worker const int32_t multiplier = (int32_t) (((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000)) << 7);
5523*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= INT32_C(0x40000000));
5524*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= INT32_C(0x7FFFFF80));
5525*4bdc9457SAndroid Build Coastguard Worker
5526*4bdc9457SAndroid Build Coastguard Worker // Shift is in [-8, 15] range.
5527*4bdc9457SAndroid Build Coastguard Worker const int32_t shift = 127 + 31 - 32 - (scale_bits >> 23);
5528*4bdc9457SAndroid Build Coastguard Worker assert(shift >= -8);
5529*4bdc9457SAndroid Build Coastguard Worker assert(shift < 16);
5530*4bdc9457SAndroid Build Coastguard Worker
5531*4bdc9457SAndroid Build Coastguard Worker // Split shift into pre_shift + post_shift, post_shift in [1, 15] range.
5532*4bdc9457SAndroid Build Coastguard Worker const int32_t post_shift = math_max_s32(shift, 1);
5533*4bdc9457SAndroid Build Coastguard Worker const int32_t pre_shift = shift - post_shift;
5534*4bdc9457SAndroid Build Coastguard Worker
5535*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.a_zero_point[0] = a_zero_point;
5536*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.a_zero_point[1] = a_zero_point;
5537*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.b_zero_point[0] = b_zero_point;
5538*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.b_zero_point[1] = b_zero_point;
5539*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_pre_shift = -pre_shift;
5540*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.multiplier = multiplier;
5541*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.left_post_shift = -post_shift;
5542*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_zero_point = (int16_t) output_zero_point;
5543*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_min = output_min;
5544*4bdc9457SAndroid Build Coastguard Worker params->rndnu_neon.output_max = output_max;
5545*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->rndnu_neon);
5546*4bdc9457SAndroid Build Coastguard Worker }
5547*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5548*4bdc9457SAndroid Build Coastguard Worker
5549*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_mul_minmax_fp32_sse2_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5550*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_sse2_params(
5551*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5552*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5553*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5554*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5555*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5556*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5557*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5558*4bdc9457SAndroid Build Coastguard Worker {
5559*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5560*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5561*4bdc9457SAndroid Build Coastguard Worker
5562*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5563*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.a_zero_point[i] = (int16_t) a_zero_point;
5564*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.b_zero_point[i] = (int16_t) b_zero_point;
5565*4bdc9457SAndroid Build Coastguard Worker }
5566*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5567*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.scale[i] = product_output_scale;
5568*4bdc9457SAndroid Build Coastguard Worker }
5569*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5570*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_zero_point[i] = (int16_t) output_zero_point;
5571*4bdc9457SAndroid Build Coastguard Worker }
5572*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5573*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_min[i] = (int16_t) output_min;
5574*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse2.output_max[i] = (int16_t) output_max;
5575*4bdc9457SAndroid Build Coastguard Worker }
5576*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse2);
5577*4bdc9457SAndroid Build Coastguard Worker }
5578*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_mul_minmax_fp32_sse4_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5579*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_sse4_params(
5580*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5581*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5582*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5583*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5584*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5585*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5586*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5587*4bdc9457SAndroid Build Coastguard Worker {
5588*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5589*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5590*4bdc9457SAndroid Build Coastguard Worker
5591*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5592*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.a_zero_point[i] = (int16_t) a_zero_point;
5593*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.b_zero_point[i] = (int16_t) b_zero_point;
5594*4bdc9457SAndroid Build Coastguard Worker }
5595*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5596*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.scale[i] = product_output_scale;
5597*4bdc9457SAndroid Build Coastguard Worker }
5598*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5599*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_zero_point[i] = (int16_t) output_zero_point;
5600*4bdc9457SAndroid Build Coastguard Worker }
5601*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5602*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_min[i] = output_min;
5603*4bdc9457SAndroid Build Coastguard Worker params->fp32_sse4.output_max[i] = output_max;
5604*4bdc9457SAndroid Build Coastguard Worker }
5605*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_sse4);
5606*4bdc9457SAndroid Build Coastguard Worker }
5607*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5608*4bdc9457SAndroid Build Coastguard Worker
5609*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_mul_minmax_fp32_wasmsimd_params(union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS (1)],int8_t a_zero_point,int8_t b_zero_point,int8_t output_zero_point,float product_output_scale,int8_t output_min,int8_t output_max)5610*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_mul_minmax_fp32_wasmsimd_params(
5611*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_mul_minmax_params params[XNN_MIN_ELEMENTS(1)],
5612*4bdc9457SAndroid Build Coastguard Worker int8_t a_zero_point,
5613*4bdc9457SAndroid Build Coastguard Worker int8_t b_zero_point,
5614*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5615*4bdc9457SAndroid Build Coastguard Worker float product_output_scale,
5616*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5617*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5618*4bdc9457SAndroid Build Coastguard Worker {
5619*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale >= 0x1.0p-16f);
5620*4bdc9457SAndroid Build Coastguard Worker assert(product_output_scale < 0x1.0p+8f);
5621*4bdc9457SAndroid Build Coastguard Worker
5622*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5623*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5624*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_output_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5625*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5626*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.a_zero_point[i] = (int16_t) a_zero_point;
5627*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.b_zero_point[i] = (int16_t) b_zero_point;
5628*4bdc9457SAndroid Build Coastguard Worker }
5629*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5630*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.scale[i] = product_output_scale;
5631*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias[i] = 12582912.0f;
5632*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_min[i] = magic_min;
5633*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.magic_bias_less_output_zero_point[i] = magic_bias_less_output_zero_point;
5634*4bdc9457SAndroid Build Coastguard Worker }
5635*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5636*4bdc9457SAndroid Build Coastguard Worker params->fp32_wasmsimd.output_max[i] = output_max;
5637*4bdc9457SAndroid Build Coastguard Worker }
5638*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->fp32_wasmsimd);
5639*4bdc9457SAndroid Build Coastguard Worker }
5640*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5641*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_f32_cvt_scalar_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5642*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_scalar_params(
5643*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5644*4bdc9457SAndroid Build Coastguard Worker {
5645*4bdc9457SAndroid Build Coastguard Worker params->scalar.sign_mask = UINT32_C(0x80000000);
5646*4bdc9457SAndroid Build Coastguard Worker params->scalar.exp_offset = UINT32_C(0x70000000);
5647*4bdc9457SAndroid Build Coastguard Worker params->scalar.exp_scale = 0x1.0p-112f;
5648*4bdc9457SAndroid Build Coastguard Worker params->scalar.magic_mask = UINT32_C(0x3F000000);
5649*4bdc9457SAndroid Build Coastguard Worker params->scalar.magic_bias = 0.5f;
5650*4bdc9457SAndroid Build Coastguard Worker params->scalar.denorm_cutoff = UINT32_C(0x08000000);
5651*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
5652*4bdc9457SAndroid Build Coastguard Worker }
5653*4bdc9457SAndroid Build Coastguard Worker
5654*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f16_f32_cvt_neon_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5655*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_neon_params(
5656*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5657*4bdc9457SAndroid Build Coastguard Worker {
5658*4bdc9457SAndroid Build Coastguard Worker params->neon.exp_scale = 0x1.0p-112f;
5659*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
5660*4bdc9457SAndroid Build Coastguard Worker }
5661*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5662*4bdc9457SAndroid Build Coastguard Worker
5663*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f16_f32_cvt_sse_int16_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5664*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_sse_int16_params(
5665*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5666*4bdc9457SAndroid Build Coastguard Worker {
5667*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5668*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.sign_mask[i] = UINT16_C(0x8000);
5669*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.exp_offset[i] = UINT16_C(0x7000);
5670*4bdc9457SAndroid Build Coastguard Worker }
5671*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5672*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.exp_scale[i] = 0x1.0p-112f;
5673*4bdc9457SAndroid Build Coastguard Worker }
5674*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5675*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.magic_mask[i] = UINT16_C(0x3F00);
5676*4bdc9457SAndroid Build Coastguard Worker }
5677*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5678*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.magic_bias[i] = 0.5f;
5679*4bdc9457SAndroid Build Coastguard Worker }
5680*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5681*4bdc9457SAndroid Build Coastguard Worker params->sse_int16.denorm_cutoff[i] = INT16_C(0x0400);
5682*4bdc9457SAndroid Build Coastguard Worker }
5683*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse_int16);
5684*4bdc9457SAndroid Build Coastguard Worker }
5685*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_f32_cvt_sse_int32_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5686*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_sse_int32_params(
5687*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5688*4bdc9457SAndroid Build Coastguard Worker {
5689*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5690*4bdc9457SAndroid Build Coastguard Worker params->sse_int32.sign_mask[i] = UINT32_C(0x80000000);
5691*4bdc9457SAndroid Build Coastguard Worker params->sse_int32.exp_offset[i] = UINT32_C(0x70000000);
5692*4bdc9457SAndroid Build Coastguard Worker params->sse_int32.exp_scale[i] = 0x1.0p-112f;
5693*4bdc9457SAndroid Build Coastguard Worker params->sse_int32.magic_bias[i] = UINT32_C(0x3F000000);
5694*4bdc9457SAndroid Build Coastguard Worker params->sse_int32.denorm_cutoff[i] = INT32_C(0x04000000);
5695*4bdc9457SAndroid Build Coastguard Worker }
5696*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse_int32);
5697*4bdc9457SAndroid Build Coastguard Worker }
5698*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5699*4bdc9457SAndroid Build Coastguard Worker
5700*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f16_f32_cvt_wasmsimd_int16_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5701*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_wasmsimd_int16_params(
5702*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5703*4bdc9457SAndroid Build Coastguard Worker {
5704*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5705*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.sign_mask[i] = UINT16_C(0x8000);
5706*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.exp_offset[i] = UINT16_C(0x7000);
5707*4bdc9457SAndroid Build Coastguard Worker }
5708*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5709*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.exp_scale[i] = 0x1.0p-112f;
5710*4bdc9457SAndroid Build Coastguard Worker }
5711*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5712*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.magic_mask[i] = UINT16_C(0x3F00);
5713*4bdc9457SAndroid Build Coastguard Worker }
5714*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5715*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.magic_bias[i] = 0.5f;
5716*4bdc9457SAndroid Build Coastguard Worker }
5717*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5718*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int16.denorm_cutoff[i] = INT16_C(0x0400);
5719*4bdc9457SAndroid Build Coastguard Worker }
5720*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_int16);
5721*4bdc9457SAndroid Build Coastguard Worker }
5722*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f16_f32_cvt_wasmsimd_int32_params(union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS (1)])5723*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f16_f32_cvt_wasmsimd_int32_params(
5724*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_f32_cvt_params params[XNN_MIN_ELEMENTS(1)])
5725*4bdc9457SAndroid Build Coastguard Worker {
5726*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5727*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int32.sign_mask[i] = UINT32_C(0x80000000);
5728*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int32.exp_offset[i] = UINT32_C(0x70000000);
5729*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int32.exp_scale[i] = 0x1.0p-112f;
5730*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int32.magic_bias[i] = UINT32_C(0x3F000000);
5731*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_int32.denorm_cutoff[i] = INT32_C(0x04000000);
5732*4bdc9457SAndroid Build Coastguard Worker }
5733*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_int32);
5734*4bdc9457SAndroid Build Coastguard Worker }
5735*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5736*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_f16_cvt_scalar_bitcast_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5737*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_scalar_bitcast_params(
5738*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5739*4bdc9457SAndroid Build Coastguard Worker {
5740*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.nonsign_mask = UINT32_C(0x7FFFFFFF);
5741*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.exp_bias = UINT32_C(0x07800000);
5742*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.scale_to_inf = 0x1.0p+112f;
5743*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.expw_max = UINT32_C(0x7F800000);
5744*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.scale_to_zero = 0x1.0p-110f;
5745*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.bias_min = UINT32_C(0x40000000);
5746*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.exph_mask = UINT16_C(0x7C00);
5747*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.manth_mask = UINT16_C(0x0FFF);
5748*4bdc9457SAndroid Build Coastguard Worker params->scalar_bitcast.nanh = UINT16_C(0x7E00);
5749*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_bitcast);
5750*4bdc9457SAndroid Build Coastguard Worker }
5751*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_f16_cvt_scalar_fabsf_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5752*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_scalar_fabsf_params(
5753*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5754*4bdc9457SAndroid Build Coastguard Worker {
5755*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.scale_to_inf = 0x1.0p+112f;
5756*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.exp_bias = UINT32_C(0x07800000);
5757*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.scale_to_zero = 0x1.0p-110f;
5758*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.expw_max = UINT32_C(0x7F800000);
5759*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.bias_min = UINT32_C(0x40000000);
5760*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.exph_mask = UINT16_C(0x7C00);
5761*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.manth_mask = UINT16_C(0x0FFF);
5762*4bdc9457SAndroid Build Coastguard Worker params->scalar_fabsf.nanh = UINT16_C(0x7E00);
5763*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_fabsf);
5764*4bdc9457SAndroid Build Coastguard Worker }
5765*4bdc9457SAndroid Build Coastguard Worker
5766*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_f16_cvt_neon_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5767*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_neon_params(
5768*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5769*4bdc9457SAndroid Build Coastguard Worker {
5770*4bdc9457SAndroid Build Coastguard Worker params->neon.exp_bias = UINT32_C(0x07800000);
5771*4bdc9457SAndroid Build Coastguard Worker params->neon.scale_to_inf = 0x1.0p+112f;
5772*4bdc9457SAndroid Build Coastguard Worker params->neon.expw_max = UINT32_C(0x7F800000);
5773*4bdc9457SAndroid Build Coastguard Worker params->neon.scale_to_zero = 0x1.0p-110f;
5774*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
5775*4bdc9457SAndroid Build Coastguard Worker }
5776*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5777*4bdc9457SAndroid Build Coastguard Worker
5778*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_f16_cvt_sse2_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5779*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_sse2_params(
5780*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5781*4bdc9457SAndroid Build Coastguard Worker {
5782*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5783*4bdc9457SAndroid Build Coastguard Worker params->sse2.nonsign_mask[i] = UINT32_C(0x7FFFFFFF);
5784*4bdc9457SAndroid Build Coastguard Worker params->sse2.exp_bias[i] = UINT32_C(0x07800000);
5785*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale_to_inf[i] = 0x1.0p+112f;
5786*4bdc9457SAndroid Build Coastguard Worker params->sse2.expw_max[i] = UINT32_C(0x7F800000);
5787*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale_to_zero[i] = 0x1.0p-110f;
5788*4bdc9457SAndroid Build Coastguard Worker }
5789*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[0] = INT16_C(0x8000);
5790*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[1] = INT16_C(0x4000);
5791*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[2] = INT16_C(0x8000);
5792*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[3] = INT16_C(0x4000);
5793*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[4] = INT16_C(0x8000);
5794*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[5] = INT16_C(0x4000);
5795*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[6] = INT16_C(0x8000);
5796*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias_min[7] = INT16_C(0x4000);
5797*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5798*4bdc9457SAndroid Build Coastguard Worker params->sse2.manth_mask[i] = UINT32_C(0x00000FFF);
5799*4bdc9457SAndroid Build Coastguard Worker params->sse2.exph_mask[i] = UINT32_C(0x00007C00);
5800*4bdc9457SAndroid Build Coastguard Worker }
5801*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5802*4bdc9457SAndroid Build Coastguard Worker params->sse2.nanh[i] = UINT16_C(0x7E00);
5803*4bdc9457SAndroid Build Coastguard Worker }
5804*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
5805*4bdc9457SAndroid Build Coastguard Worker }
5806*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_f16_cvt_f16c_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5807*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_f16c_params(
5808*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5809*4bdc9457SAndroid Build Coastguard Worker {
5810*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
5811*4bdc9457SAndroid Build Coastguard Worker params->f16c.mask_table[i] = -1;
5812*4bdc9457SAndroid Build Coastguard Worker }
5813*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
5814*4bdc9457SAndroid Build Coastguard Worker params->f16c.mask_table[i] = 0;
5815*4bdc9457SAndroid Build Coastguard Worker }
5816*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->f16c);
5817*4bdc9457SAndroid Build Coastguard Worker }
5818*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5819*4bdc9457SAndroid Build Coastguard Worker
5820*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_f16_cvt_wasmsimd_params(union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS (1)])5821*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_f16_cvt_wasmsimd_params(
5822*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_f16_cvt_params params[XNN_MIN_ELEMENTS(1)])
5823*4bdc9457SAndroid Build Coastguard Worker {
5824*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5825*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.exp_bias[i] = UINT32_C(0x07800000);
5826*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.scale_to_inf[i] = 0x1.0p+112f;
5827*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.expw_max[i] = UINT32_C(0x7F800000);
5828*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.scale_to_zero[i] = 0x1.0p-110f;
5829*4bdc9457SAndroid Build Coastguard Worker }
5830*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias_min[0] = INT16_C(0x8000);
5831*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias_min[1] = INT16_C(0x4000);
5832*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias_min[2] = INT16_C(0x8000);
5833*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.bias_min[3] = INT16_C(0x4000);
5834*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
5835*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.manth_mask[i] = UINT32_C(0x00000FFF);
5836*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.exph_mask[i] = UINT32_C(0x00007C00);
5837*4bdc9457SAndroid Build Coastguard Worker }
5838*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5839*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.nanh[i] = UINT16_C(0x7E00);
5840*4bdc9457SAndroid Build Coastguard Worker }
5841*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
5842*4bdc9457SAndroid Build Coastguard Worker }
5843*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
5844*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_scalar_fmagic_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5845*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_scalar_fmagic_params(
5846*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5847*4bdc9457SAndroid Build Coastguard Worker float scale,
5848*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5849*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5850*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5851*4bdc9457SAndroid Build Coastguard Worker {
5852*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.scale = scale;
5853*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5854*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5855*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.magic_bias = 12582912.0f;
5856*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5857*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_fmagic);
5858*4bdc9457SAndroid Build Coastguard Worker }
5859*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_scalar_imagic_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5860*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_scalar_imagic_params(
5861*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5862*4bdc9457SAndroid Build Coastguard Worker float scale,
5863*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5864*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5865*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5866*4bdc9457SAndroid Build Coastguard Worker {
5867*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5868*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5869*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.scale = scale;
5870*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_bias = 12582912.0f;
5871*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
5872*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
5873*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5874*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_imagic);
5875*4bdc9457SAndroid Build Coastguard Worker }
5876*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_scalar_lrintf_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5877*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_scalar_lrintf_params(
5878*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5879*4bdc9457SAndroid Build Coastguard Worker float scale,
5880*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5881*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5882*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5883*4bdc9457SAndroid Build Coastguard Worker {
5884*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.scale = scale;
5885*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
5886*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5887*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
5888*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_lrintf);
5889*4bdc9457SAndroid Build Coastguard Worker }
5890*4bdc9457SAndroid Build Coastguard Worker
5891*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_qs8_cvt_neon_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5892*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_neon_params(
5893*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5894*4bdc9457SAndroid Build Coastguard Worker float scale,
5895*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5896*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5897*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5898*4bdc9457SAndroid Build Coastguard Worker {
5899*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
5900*4bdc9457SAndroid Build Coastguard Worker params->neon.magic_bias = 12582912.0f;
5901*4bdc9457SAndroid Build Coastguard Worker params->neon.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
5902*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
5903*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
5904*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
5905*4bdc9457SAndroid Build Coastguard Worker }
5906*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_neonv8_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5907*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_neonv8_params(
5908*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5909*4bdc9457SAndroid Build Coastguard Worker float scale,
5910*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5911*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5912*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5913*4bdc9457SAndroid Build Coastguard Worker {
5914*4bdc9457SAndroid Build Coastguard Worker params->neonv8.scale = scale;
5915*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_zero_point = (int16_t) output_zero_point;
5916*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_min = output_min;
5917*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_max = output_max;
5918*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonv8);
5919*4bdc9457SAndroid Build Coastguard Worker }
5920*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5921*4bdc9457SAndroid Build Coastguard Worker
5922*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_qs8_cvt_sse2_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5923*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_sse2_params(
5924*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5925*4bdc9457SAndroid Build Coastguard Worker float scale,
5926*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5927*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5928*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5929*4bdc9457SAndroid Build Coastguard Worker {
5930*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5931*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5932*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale[i] = scale;
5933*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
5934*4bdc9457SAndroid Build Coastguard Worker }
5935*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5936*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
5937*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_min[i] = (int16_t) output_min;
5938*4bdc9457SAndroid Build Coastguard Worker }
5939*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
5940*4bdc9457SAndroid Build Coastguard Worker }
5941*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_sse4_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5942*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_sse4_params(
5943*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5944*4bdc9457SAndroid Build Coastguard Worker float scale,
5945*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5946*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5947*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5948*4bdc9457SAndroid Build Coastguard Worker {
5949*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5950*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
5951*4bdc9457SAndroid Build Coastguard Worker params->sse4.scale[i] = scale;
5952*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_max_less_zero_point[i] = output_max_less_zero_point;
5953*4bdc9457SAndroid Build Coastguard Worker }
5954*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5955*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_zero_point[i] = (int16_t) output_zero_point;
5956*4bdc9457SAndroid Build Coastguard Worker }
5957*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5958*4bdc9457SAndroid Build Coastguard Worker params->sse4.output_min[i] = output_min;
5959*4bdc9457SAndroid Build Coastguard Worker }
5960*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4);
5961*4bdc9457SAndroid Build Coastguard Worker }
5962*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_avx_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5963*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_avx_params(
5964*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5965*4bdc9457SAndroid Build Coastguard Worker float scale,
5966*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5967*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5968*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5969*4bdc9457SAndroid Build Coastguard Worker {
5970*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5971*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5972*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale;
5973*4bdc9457SAndroid Build Coastguard Worker params->avx.output_max_less_zero_point[i] = output_max_less_zero_point;
5974*4bdc9457SAndroid Build Coastguard Worker }
5975*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5976*4bdc9457SAndroid Build Coastguard Worker params->avx.output_zero_point[i] = (int16_t) output_zero_point;
5977*4bdc9457SAndroid Build Coastguard Worker }
5978*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
5979*4bdc9457SAndroid Build Coastguard Worker params->avx.output_min[i] = output_min;
5980*4bdc9457SAndroid Build Coastguard Worker }
5981*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
5982*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
5983*4bdc9457SAndroid Build Coastguard Worker }
5984*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
5985*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
5986*4bdc9457SAndroid Build Coastguard Worker }
5987*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
5988*4bdc9457SAndroid Build Coastguard Worker }
5989*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_avx2_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)5990*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_avx2_params(
5991*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
5992*4bdc9457SAndroid Build Coastguard Worker float scale,
5993*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
5994*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
5995*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
5996*4bdc9457SAndroid Build Coastguard Worker {
5997*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
5998*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
5999*4bdc9457SAndroid Build Coastguard Worker params->avx2.scale[i] = scale;
6000*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
6001*4bdc9457SAndroid Build Coastguard Worker }
6002*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6003*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6004*4bdc9457SAndroid Build Coastguard Worker }
6005*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[0] = 0;
6006*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[1] = 4;
6007*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[2] = 1;
6008*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[3] = 5;
6009*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[4] = 2;
6010*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[5] = 6;
6011*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[6] = 3;
6012*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[7] = 7;
6013*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
6014*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_min[i] = output_min;
6015*4bdc9457SAndroid Build Coastguard Worker }
6016*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
6017*4bdc9457SAndroid Build Coastguard Worker params->avx2.mask_table[i] = -1;
6018*4bdc9457SAndroid Build Coastguard Worker }
6019*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
6020*4bdc9457SAndroid Build Coastguard Worker params->avx2.mask_table[i] = 0;
6021*4bdc9457SAndroid Build Coastguard Worker }
6022*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
6023*4bdc9457SAndroid Build Coastguard Worker }
6024*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_avx512_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)6025*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_avx512_params(
6026*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6027*4bdc9457SAndroid Build Coastguard Worker float scale,
6028*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
6029*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
6030*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
6031*4bdc9457SAndroid Build Coastguard Worker {
6032*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6033*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6034*4bdc9457SAndroid Build Coastguard Worker params->avx512.scale[i] = scale;
6035*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
6036*4bdc9457SAndroid Build Coastguard Worker }
6037*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
6038*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
6039*4bdc9457SAndroid Build Coastguard Worker }
6040*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 64; i++) {
6041*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_min[i] = output_min;
6042*4bdc9457SAndroid Build Coastguard Worker }
6043*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[0] = 0;
6044*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[1] = 4;
6045*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[2] = 8;
6046*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[3] = 12;
6047*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[4] = 1;
6048*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[5] = 5;
6049*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[6] = 9;
6050*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[7] = 13;
6051*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[8] = 2;
6052*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[9] = 6;
6053*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[10] = 10;
6054*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[11] = 14;
6055*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[12] = 3;
6056*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[13] = 7;
6057*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[14] = 11;
6058*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[15] = 15;
6059*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[0] = 0;
6060*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[1] = 4;
6061*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[2] = 2;
6062*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[3] = 6;
6063*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[4] = 1;
6064*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[5] = 5;
6065*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[6] = 3;
6066*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[7] = 7;
6067*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
6068*4bdc9457SAndroid Build Coastguard Worker }
6069*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6070*4bdc9457SAndroid Build Coastguard Worker
6071*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_qs8_cvt_wasmsimd_cvt_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)6072*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_wasmsimd_cvt_params(
6073*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6074*4bdc9457SAndroid Build Coastguard Worker float scale,
6075*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
6076*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
6077*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
6078*4bdc9457SAndroid Build Coastguard Worker {
6079*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6080*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.scale[i] = scale;
6081*4bdc9457SAndroid Build Coastguard Worker }
6082*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6083*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_zero_point[i] = (int16_t) output_zero_point;
6084*4bdc9457SAndroid Build Coastguard Worker }
6085*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6086*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_min[i] = output_min;
6087*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_max[i] = output_max;
6088*4bdc9457SAndroid Build Coastguard Worker }
6089*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_cvt);
6090*4bdc9457SAndroid Build Coastguard Worker }
6091*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qs8_cvt_wasmsimd_magic_params(union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t output_zero_point,int8_t output_min,int8_t output_max)6092*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qs8_cvt_wasmsimd_magic_params(
6093*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6094*4bdc9457SAndroid Build Coastguard Worker float scale,
6095*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point,
6096*4bdc9457SAndroid Build Coastguard Worker int8_t output_min,
6097*4bdc9457SAndroid Build Coastguard Worker int8_t output_max)
6098*4bdc9457SAndroid Build Coastguard Worker {
6099*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6100*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6101*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6102*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6103*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.scale[i] = scale;
6104*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_bias[i] = 12582912.0f;
6105*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_min[i] = magic_min;
6106*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_bias_less_zero_point[i] = magic_bias_less_zero_point;
6107*4bdc9457SAndroid Build Coastguard Worker }
6108*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6109*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.output_max[i] = output_max;
6110*4bdc9457SAndroid Build Coastguard Worker }
6111*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_magic);
6112*4bdc9457SAndroid Build Coastguard Worker }
6113*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6114*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_scalar_fmagic_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6115*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_scalar_fmagic_params(
6116*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6117*4bdc9457SAndroid Build Coastguard Worker float scale,
6118*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6119*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6120*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6121*4bdc9457SAndroid Build Coastguard Worker {
6122*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.scale = scale;
6123*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6124*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6125*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.magic_bias = 12582912.0f;
6126*4bdc9457SAndroid Build Coastguard Worker params->scalar_fmagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6127*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_fmagic);
6128*4bdc9457SAndroid Build Coastguard Worker }
6129*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_scalar_imagic_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6130*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_scalar_imagic_params(
6131*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6132*4bdc9457SAndroid Build Coastguard Worker float scale,
6133*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6134*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6135*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6136*4bdc9457SAndroid Build Coastguard Worker {
6137*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6138*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6139*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.scale = scale;
6140*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_bias = 12582912.0f;
6141*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6142*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_max = (int32_t) float_as_uint32(12582912.0f + output_max_less_zero_point);
6143*4bdc9457SAndroid Build Coastguard Worker params->scalar_imagic.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6144*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_imagic);
6145*4bdc9457SAndroid Build Coastguard Worker }
6146*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_scalar_lrintf_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6147*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_scalar_lrintf_params(
6148*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6149*4bdc9457SAndroid Build Coastguard Worker float scale,
6150*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6151*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6152*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6153*4bdc9457SAndroid Build Coastguard Worker {
6154*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.scale = scale;
6155*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6156*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6157*4bdc9457SAndroid Build Coastguard Worker params->scalar_lrintf.output_zero_point = (int32_t) output_zero_point;
6158*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar_lrintf);
6159*4bdc9457SAndroid Build Coastguard Worker }
6160*4bdc9457SAndroid Build Coastguard Worker
6161*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_f32_qu8_cvt_neon_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6162*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_neon_params(
6163*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6164*4bdc9457SAndroid Build Coastguard Worker float scale,
6165*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6166*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6167*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6168*4bdc9457SAndroid Build Coastguard Worker {
6169*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
6170*4bdc9457SAndroid Build Coastguard Worker params->neon.magic_bias = 12582912.0f;
6171*4bdc9457SAndroid Build Coastguard Worker params->neon.magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6172*4bdc9457SAndroid Build Coastguard Worker params->neon.output_min = output_min;
6173*4bdc9457SAndroid Build Coastguard Worker params->neon.output_max = output_max;
6174*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
6175*4bdc9457SAndroid Build Coastguard Worker }
6176*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_neonv8_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6177*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_neonv8_params(
6178*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6179*4bdc9457SAndroid Build Coastguard Worker float scale,
6180*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6181*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6182*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6183*4bdc9457SAndroid Build Coastguard Worker {
6184*4bdc9457SAndroid Build Coastguard Worker params->neonv8.scale = scale;
6185*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_zero_point = (int16_t) output_zero_point;
6186*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_min = output_min;
6187*4bdc9457SAndroid Build Coastguard Worker params->neonv8.output_max = output_max;
6188*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neonv8);
6189*4bdc9457SAndroid Build Coastguard Worker }
6190*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6191*4bdc9457SAndroid Build Coastguard Worker
6192*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_f32_qu8_cvt_sse2_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6193*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_sse2_params(
6194*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6195*4bdc9457SAndroid Build Coastguard Worker float scale,
6196*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6197*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6198*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6199*4bdc9457SAndroid Build Coastguard Worker {
6200*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6201*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6202*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale[i] = scale;
6203*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_max_less_zero_point[i] = output_max_less_zero_point;
6204*4bdc9457SAndroid Build Coastguard Worker }
6205*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6206*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
6207*4bdc9457SAndroid Build Coastguard Worker }
6208*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6209*4bdc9457SAndroid Build Coastguard Worker params->sse2.output_min[i] = output_min;
6210*4bdc9457SAndroid Build Coastguard Worker }
6211*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
6212*4bdc9457SAndroid Build Coastguard Worker }
6213*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_avx_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6214*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_avx_params(
6215*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6216*4bdc9457SAndroid Build Coastguard Worker float scale,
6217*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6218*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6219*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6220*4bdc9457SAndroid Build Coastguard Worker {
6221*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6222*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6223*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale;
6224*4bdc9457SAndroid Build Coastguard Worker params->avx.output_max_less_zero_point[i] = output_max_less_zero_point;
6225*4bdc9457SAndroid Build Coastguard Worker }
6226*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6227*4bdc9457SAndroid Build Coastguard Worker params->avx.output_zero_point[i] = (int16_t) output_zero_point;
6228*4bdc9457SAndroid Build Coastguard Worker }
6229*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6230*4bdc9457SAndroid Build Coastguard Worker params->avx.output_min[i] = output_min;
6231*4bdc9457SAndroid Build Coastguard Worker }
6232*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
6233*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = -1;
6234*4bdc9457SAndroid Build Coastguard Worker }
6235*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
6236*4bdc9457SAndroid Build Coastguard Worker params->avx.mask_table[i] = 0;
6237*4bdc9457SAndroid Build Coastguard Worker }
6238*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
6239*4bdc9457SAndroid Build Coastguard Worker }
6240*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_avx2_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6241*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_avx2_params(
6242*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6243*4bdc9457SAndroid Build Coastguard Worker float scale,
6244*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6245*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6246*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6247*4bdc9457SAndroid Build Coastguard Worker {
6248*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6249*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6250*4bdc9457SAndroid Build Coastguard Worker params->avx2.scale[i] = scale;
6251*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_max_less_zero_point[i] = output_max_less_zero_point;
6252*4bdc9457SAndroid Build Coastguard Worker }
6253*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6254*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6255*4bdc9457SAndroid Build Coastguard Worker }
6256*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[0] = 0;
6257*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[1] = 4;
6258*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[2] = 1;
6259*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[3] = 5;
6260*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[4] = 2;
6261*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[5] = 6;
6262*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[6] = 3;
6263*4bdc9457SAndroid Build Coastguard Worker params->avx2.shuffle_mask[7] = 7;
6264*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
6265*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_min[i] = output_min;
6266*4bdc9457SAndroid Build Coastguard Worker }
6267*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 7; i++) {
6268*4bdc9457SAndroid Build Coastguard Worker params->avx2.mask_table[i] = -1;
6269*4bdc9457SAndroid Build Coastguard Worker }
6270*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 7; i < 14; i++) {
6271*4bdc9457SAndroid Build Coastguard Worker params->avx2.mask_table[i] = 0;
6272*4bdc9457SAndroid Build Coastguard Worker }
6273*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
6274*4bdc9457SAndroid Build Coastguard Worker }
6275*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_avx512_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6276*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_avx512_params(
6277*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6278*4bdc9457SAndroid Build Coastguard Worker float scale,
6279*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6280*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6281*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6282*4bdc9457SAndroid Build Coastguard Worker {
6283*4bdc9457SAndroid Build Coastguard Worker const float output_max_less_zero_point = (float) ((int32_t) output_max - (int32_t) output_zero_point);
6284*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6285*4bdc9457SAndroid Build Coastguard Worker params->avx512.scale[i] = scale;
6286*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_max_less_zero_point[i] = output_max_less_zero_point;
6287*4bdc9457SAndroid Build Coastguard Worker }
6288*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 32; i++) {
6289*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_zero_point[i] = (int16_t) output_zero_point;
6290*4bdc9457SAndroid Build Coastguard Worker }
6291*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 64; i++) {
6292*4bdc9457SAndroid Build Coastguard Worker params->avx512.output_min[i] = output_min;
6293*4bdc9457SAndroid Build Coastguard Worker }
6294*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[0] = 0;
6295*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[1] = 4;
6296*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[2] = 8;
6297*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[3] = 12;
6298*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[4] = 1;
6299*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[5] = 5;
6300*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[6] = 9;
6301*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[7] = 13;
6302*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[8] = 2;
6303*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[9] = 6;
6304*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[10] = 10;
6305*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[11] = 14;
6306*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[12] = 3;
6307*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[13] = 7;
6308*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[14] = 11;
6309*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle512_mask[15] = 15;
6310*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[0] = 0;
6311*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[1] = 4;
6312*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[2] = 2;
6313*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[3] = 6;
6314*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[4] = 1;
6315*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[5] = 5;
6316*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[6] = 3;
6317*4bdc9457SAndroid Build Coastguard Worker params->avx512.shuffle256_mask[7] = 7;
6318*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
6319*4bdc9457SAndroid Build Coastguard Worker }
6320*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6321*4bdc9457SAndroid Build Coastguard Worker
6322*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_f32_qu8_cvt_wasmsimd_cvt_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6323*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_wasmsimd_cvt_params(
6324*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6325*4bdc9457SAndroid Build Coastguard Worker float scale,
6326*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6327*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6328*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6329*4bdc9457SAndroid Build Coastguard Worker {
6330*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6331*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.scale[i] = scale;
6332*4bdc9457SAndroid Build Coastguard Worker }
6333*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6334*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_zero_point[i] = (int16_t) output_zero_point;
6335*4bdc9457SAndroid Build Coastguard Worker }
6336*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6337*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_min[i] = output_min;
6338*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_cvt.output_max[i] = output_max;
6339*4bdc9457SAndroid Build Coastguard Worker }
6340*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_cvt);
6341*4bdc9457SAndroid Build Coastguard Worker }
6342*4bdc9457SAndroid Build Coastguard Worker
xnn_init_f32_qu8_cvt_wasmsimd_magic_params(union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max)6343*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_f32_qu8_cvt_wasmsimd_magic_params(
6344*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6345*4bdc9457SAndroid Build Coastguard Worker float scale,
6346*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
6347*4bdc9457SAndroid Build Coastguard Worker uint8_t output_min,
6348*4bdc9457SAndroid Build Coastguard Worker uint8_t output_max)
6349*4bdc9457SAndroid Build Coastguard Worker {
6350*4bdc9457SAndroid Build Coastguard Worker const float output_min_less_zero_point = (float) ((int32_t) output_min - (int32_t) output_zero_point);
6351*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_min = (int32_t) float_as_uint32(12582912.0f + output_min_less_zero_point);
6352*4bdc9457SAndroid Build Coastguard Worker const int32_t magic_bias_less_zero_point = INT32_C(0x4B400000) - (int32_t) output_zero_point;
6353*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6354*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.scale[i] = scale;
6355*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_bias[i] = 12582912.0f;
6356*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_min[i] = magic_min;
6357*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.magic_bias_less_zero_point[i] = magic_bias_less_zero_point;
6358*4bdc9457SAndroid Build Coastguard Worker }
6359*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6360*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd_magic.output_max[i] = output_max;
6361*4bdc9457SAndroid Build Coastguard Worker }
6362*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd_magic);
6363*4bdc9457SAndroid Build Coastguard Worker }
6364*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6365*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_cvt_scalar_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6366*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_scalar_params(
6367*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6368*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6369*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6370*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6371*4bdc9457SAndroid Build Coastguard Worker {
6372*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6373*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6374*4bdc9457SAndroid Build Coastguard Worker
6375*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(256.0f * input_output_scale);
6376*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= 1L);
6377*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= 32768L);
6378*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6379*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = (int32_t) multiplier;
6380*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
6381*4bdc9457SAndroid Build Coastguard Worker }
6382*4bdc9457SAndroid Build Coastguard Worker
6383*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qs8_cvt_armsimd32_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6384*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_armsimd32_params(
6385*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6386*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6387*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6388*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6389*4bdc9457SAndroid Build Coastguard Worker {
6390*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6391*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6392*4bdc9457SAndroid Build Coastguard Worker
6393*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(131072.0f * input_output_scale);
6394*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= 512L);
6395*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= 16777216L);
6396*4bdc9457SAndroid Build Coastguard Worker const uint16_t minus_input_zero_point = -(int16_t) input_zero_point;
6397*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.minus_input_zero_point = (uint32_t) minus_input_zero_point * UINT32_C(0x00010001);
6398*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.multiplier = (int32_t) multiplier;
6399*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.bias = ((int32_t) output_zero_point << 1) + INT32_C(1);
6400*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->armsimd32);
6401*4bdc9457SAndroid Build Coastguard Worker }
6402*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
6403*4bdc9457SAndroid Build Coastguard Worker
6404*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_cvt_neon_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6405*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_neon_params(
6406*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6407*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6408*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6409*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6410*4bdc9457SAndroid Build Coastguard Worker {
6411*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6412*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6413*4bdc9457SAndroid Build Coastguard Worker
6414*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6415*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6416*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6417*4bdc9457SAndroid Build Coastguard Worker params->neon.input_zero_point = (int16_t) input_zero_point;
6418*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = (int16_t) multiplier;
6419*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) output_zero_point;
6420*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
6421*4bdc9457SAndroid Build Coastguard Worker }
6422*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6423*4bdc9457SAndroid Build Coastguard Worker
6424*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_cvt_sse2_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6425*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_sse2_params(
6426*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6427*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6428*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6429*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6430*4bdc9457SAndroid Build Coastguard Worker {
6431*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6432*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6433*4bdc9457SAndroid Build Coastguard Worker
6434*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6435*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6436*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6437*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = ((int32_t) output_zero_point << 8) + (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6438*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6439*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[i] = (int16_t) multiplier;
6440*4bdc9457SAndroid Build Coastguard Worker }
6441*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6442*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[i] = bias;
6443*4bdc9457SAndroid Build Coastguard Worker }
6444*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
6445*4bdc9457SAndroid Build Coastguard Worker }
6446*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_cvt_ssse3_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6447*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_ssse3_params(
6448*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6449*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6450*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6451*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6452*4bdc9457SAndroid Build Coastguard Worker {
6453*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6454*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6455*4bdc9457SAndroid Build Coastguard Worker
6456*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6457*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6458*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6459*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6460*4bdc9457SAndroid Build Coastguard Worker params->ssse3.input_zero_point[i] = (int16_t) input_zero_point;
6461*4bdc9457SAndroid Build Coastguard Worker params->ssse3.multiplier[i] = (int16_t) multiplier;
6462*4bdc9457SAndroid Build Coastguard Worker params->ssse3.output_zero_point[i] = (int16_t) output_zero_point;
6463*4bdc9457SAndroid Build Coastguard Worker }
6464*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->ssse3);
6465*4bdc9457SAndroid Build Coastguard Worker }
6466*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_cvt_avx2_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6467*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_avx2_params(
6468*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6469*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6470*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6471*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6472*4bdc9457SAndroid Build Coastguard Worker {
6473*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6474*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6475*4bdc9457SAndroid Build Coastguard Worker
6476*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6477*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6478*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6479*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6480*4bdc9457SAndroid Build Coastguard Worker params->avx2.input_zero_point[i] = (int16_t) input_zero_point;
6481*4bdc9457SAndroid Build Coastguard Worker params->avx2.multiplier[i] = (int16_t) multiplier;
6482*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6483*4bdc9457SAndroid Build Coastguard Worker }
6484*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
6485*4bdc9457SAndroid Build Coastguard Worker }
6486*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6487*4bdc9457SAndroid Build Coastguard Worker
6488*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_cvt_wasmsimd_params(union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,int8_t input_zero_point,int8_t output_zero_point)6489*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_cvt_wasmsimd_params(
6490*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6491*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6492*4bdc9457SAndroid Build Coastguard Worker int8_t input_zero_point,
6493*4bdc9457SAndroid Build Coastguard Worker int8_t output_zero_point)
6494*4bdc9457SAndroid Build Coastguard Worker {
6495*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6496*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6497*4bdc9457SAndroid Build Coastguard Worker
6498*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6499*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6500*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6501*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6502*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.input_zero_point[i] = (int16_t) input_zero_point;
6503*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.multiplier[i] = (int16_t) multiplier;
6504*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
6505*4bdc9457SAndroid Build Coastguard Worker }
6506*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
6507*4bdc9457SAndroid Build Coastguard Worker }
6508*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6509*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_f32_cvt_scalar_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6510*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_scalar_params(
6511*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6512*4bdc9457SAndroid Build Coastguard Worker float scale,
6513*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6514*4bdc9457SAndroid Build Coastguard Worker {
6515*4bdc9457SAndroid Build Coastguard Worker params->scalar.zero_point = (int32_t) zero_point;
6516*4bdc9457SAndroid Build Coastguard Worker params->scalar.scale = scale;
6517*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
6518*4bdc9457SAndroid Build Coastguard Worker }
6519*4bdc9457SAndroid Build Coastguard Worker
6520*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qs8_f32_cvt_neon_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6521*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_neon_params(
6522*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6523*4bdc9457SAndroid Build Coastguard Worker float scale,
6524*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6525*4bdc9457SAndroid Build Coastguard Worker {
6526*4bdc9457SAndroid Build Coastguard Worker params->neon.minus_zero_point[0] = -(int16_t) zero_point;
6527*4bdc9457SAndroid Build Coastguard Worker params->neon.minus_zero_point[1] = -(int16_t) zero_point;
6528*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
6529*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
6530*4bdc9457SAndroid Build Coastguard Worker }
6531*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6532*4bdc9457SAndroid Build Coastguard Worker
6533*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qs8_f32_cvt_sse2_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6534*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_sse2_params(
6535*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6536*4bdc9457SAndroid Build Coastguard Worker float scale,
6537*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6538*4bdc9457SAndroid Build Coastguard Worker {
6539*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6540*4bdc9457SAndroid Build Coastguard Worker params->sse2.sign_mask[i] = UINT8_C(0x80);
6541*4bdc9457SAndroid Build Coastguard Worker }
6542*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6543*4bdc9457SAndroid Build Coastguard Worker params->sse2.magic_exp[i] = UINT16_C(0x4B00);
6544*4bdc9457SAndroid Build Coastguard Worker }
6545*4bdc9457SAndroid Build Coastguard Worker const float magic_bias = (float) (INT32_C(0x00800080) + (int32_t) zero_point);
6546*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6547*4bdc9457SAndroid Build Coastguard Worker params->sse2.magic_bias[i] = magic_bias;
6548*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale[i] = scale;
6549*4bdc9457SAndroid Build Coastguard Worker }
6550*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
6551*4bdc9457SAndroid Build Coastguard Worker }
6552*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_f32_cvt_sse4_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6553*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_sse4_params(
6554*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6555*4bdc9457SAndroid Build Coastguard Worker float scale,
6556*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6557*4bdc9457SAndroid Build Coastguard Worker {
6558*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6559*4bdc9457SAndroid Build Coastguard Worker params->sse4.minus_zero_point[i] = -(int32_t) zero_point;
6560*4bdc9457SAndroid Build Coastguard Worker params->sse4.scale[i] = scale;
6561*4bdc9457SAndroid Build Coastguard Worker }
6562*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4);
6563*4bdc9457SAndroid Build Coastguard Worker }
6564*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_f32_cvt_avx_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6565*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_avx_params(
6566*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6567*4bdc9457SAndroid Build Coastguard Worker float scale,
6568*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6569*4bdc9457SAndroid Build Coastguard Worker {
6570*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6571*4bdc9457SAndroid Build Coastguard Worker params->avx.minus_zero_point[i] = -(int32_t) zero_point;
6572*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale;
6573*4bdc9457SAndroid Build Coastguard Worker }
6574*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
6575*4bdc9457SAndroid Build Coastguard Worker }
6576*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qs8_f32_cvt_avx512_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6577*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_avx512_params(
6578*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6579*4bdc9457SAndroid Build Coastguard Worker float scale,
6580*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6581*4bdc9457SAndroid Build Coastguard Worker {
6582*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6583*4bdc9457SAndroid Build Coastguard Worker params->avx512.minus_zero_point[i] = -(int32_t) zero_point;
6584*4bdc9457SAndroid Build Coastguard Worker params->avx512.scale[i] = scale;
6585*4bdc9457SAndroid Build Coastguard Worker }
6586*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
6587*4bdc9457SAndroid Build Coastguard Worker }
6588*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6589*4bdc9457SAndroid Build Coastguard Worker
6590*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qs8_f32_cvt_wasmsimd_params(union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,int8_t zero_point)6591*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qs8_f32_cvt_wasmsimd_params(
6592*4bdc9457SAndroid Build Coastguard Worker union xnn_qs8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6593*4bdc9457SAndroid Build Coastguard Worker float scale,
6594*4bdc9457SAndroid Build Coastguard Worker int8_t zero_point)
6595*4bdc9457SAndroid Build Coastguard Worker {
6596*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6597*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.minus_zero_point[i] = -(int16_t) zero_point;
6598*4bdc9457SAndroid Build Coastguard Worker }
6599*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6600*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.scale[i] = scale;
6601*4bdc9457SAndroid Build Coastguard Worker }
6602*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
6603*4bdc9457SAndroid Build Coastguard Worker }
6604*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6605*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_cvt_scalar_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6606*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_scalar_params(
6607*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6608*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6609*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6610*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6611*4bdc9457SAndroid Build Coastguard Worker {
6612*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6613*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6614*4bdc9457SAndroid Build Coastguard Worker
6615*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(256.0f * input_output_scale);
6616*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= 1L);
6617*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= 32768L);
6618*4bdc9457SAndroid Build Coastguard Worker params->scalar.bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6619*4bdc9457SAndroid Build Coastguard Worker params->scalar.multiplier = (int32_t) multiplier;
6620*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
6621*4bdc9457SAndroid Build Coastguard Worker }
6622*4bdc9457SAndroid Build Coastguard Worker
6623*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM
xnn_init_qu8_cvt_armsimd32_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6624*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_armsimd32_params(
6625*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6626*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6627*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6628*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6629*4bdc9457SAndroid Build Coastguard Worker {
6630*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6631*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6632*4bdc9457SAndroid Build Coastguard Worker
6633*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(131072.0f * input_output_scale);
6634*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= 512L);
6635*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= 16777216L);
6636*4bdc9457SAndroid Build Coastguard Worker const uint16_t minus_input_zero_point = -(int16_t) input_zero_point;
6637*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.minus_input_zero_point = (uint32_t) minus_input_zero_point * UINT32_C(0x00010001);
6638*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.multiplier = (int32_t) multiplier;
6639*4bdc9457SAndroid Build Coastguard Worker params->armsimd32.bias = ((int32_t) output_zero_point << 1) + INT32_C(1);
6640*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->armsimd32);
6641*4bdc9457SAndroid Build Coastguard Worker }
6642*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM
6643*4bdc9457SAndroid Build Coastguard Worker
6644*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_cvt_neon_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6645*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_neon_params(
6646*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6647*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6648*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6649*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6650*4bdc9457SAndroid Build Coastguard Worker {
6651*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6652*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6653*4bdc9457SAndroid Build Coastguard Worker
6654*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6655*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6656*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6657*4bdc9457SAndroid Build Coastguard Worker params->neon.input_zero_point = (uint16_t) input_zero_point;
6658*4bdc9457SAndroid Build Coastguard Worker params->neon.multiplier = (int16_t) multiplier;
6659*4bdc9457SAndroid Build Coastguard Worker params->neon.output_zero_point = (int16_t) output_zero_point;
6660*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
6661*4bdc9457SAndroid Build Coastguard Worker }
6662*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6663*4bdc9457SAndroid Build Coastguard Worker
6664*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_cvt_sse2_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6665*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_sse2_params(
6666*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6667*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6668*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6669*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6670*4bdc9457SAndroid Build Coastguard Worker {
6671*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6672*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6673*4bdc9457SAndroid Build Coastguard Worker
6674*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(256.0f * input_output_scale);
6675*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= 1L);
6676*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= 32768L);
6677*4bdc9457SAndroid Build Coastguard Worker const int32_t bias = ((int32_t) output_zero_point << 8) - (int32_t) multiplier * (int32_t) input_zero_point + INT32_C(0x80);
6678*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6679*4bdc9457SAndroid Build Coastguard Worker params->sse2.multiplier[i] = (uint16_t) multiplier;
6680*4bdc9457SAndroid Build Coastguard Worker }
6681*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6682*4bdc9457SAndroid Build Coastguard Worker params->sse2.bias[i] = bias;
6683*4bdc9457SAndroid Build Coastguard Worker }
6684*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
6685*4bdc9457SAndroid Build Coastguard Worker }
6686*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_cvt_ssse3_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6687*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_ssse3_params(
6688*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6689*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6690*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6691*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6692*4bdc9457SAndroid Build Coastguard Worker {
6693*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6694*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6695*4bdc9457SAndroid Build Coastguard Worker
6696*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6697*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6698*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6699*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6700*4bdc9457SAndroid Build Coastguard Worker params->ssse3.input_zero_point[i] = (uint16_t) input_zero_point;
6701*4bdc9457SAndroid Build Coastguard Worker params->ssse3.multiplier[i] = (int16_t) multiplier;
6702*4bdc9457SAndroid Build Coastguard Worker params->ssse3.output_zero_point[i] = (int16_t) output_zero_point;
6703*4bdc9457SAndroid Build Coastguard Worker }
6704*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->ssse3);
6705*4bdc9457SAndroid Build Coastguard Worker }
6706*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_cvt_avx2_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6707*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_avx2_params(
6708*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6709*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6710*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6711*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6712*4bdc9457SAndroid Build Coastguard Worker {
6713*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6714*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6715*4bdc9457SAndroid Build Coastguard Worker
6716*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6717*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6718*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6719*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6720*4bdc9457SAndroid Build Coastguard Worker params->avx2.input_zero_point[i] = (uint16_t) input_zero_point;
6721*4bdc9457SAndroid Build Coastguard Worker params->avx2.multiplier[i] = (int16_t) multiplier;
6722*4bdc9457SAndroid Build Coastguard Worker params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
6723*4bdc9457SAndroid Build Coastguard Worker }
6724*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx2);
6725*4bdc9457SAndroid Build Coastguard Worker }
6726*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6727*4bdc9457SAndroid Build Coastguard Worker
6728*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_cvt_wasmsimd_params(union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS (1)],float input_output_scale,uint8_t input_zero_point,uint8_t output_zero_point)6729*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_cvt_wasmsimd_params(
6730*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_cvt_params params[XNN_MIN_ELEMENTS(1)],
6731*4bdc9457SAndroid Build Coastguard Worker float input_output_scale,
6732*4bdc9457SAndroid Build Coastguard Worker uint8_t input_zero_point,
6733*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point)
6734*4bdc9457SAndroid Build Coastguard Worker {
6735*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale >= 0x1.0p-8);
6736*4bdc9457SAndroid Build Coastguard Worker assert(input_output_scale <= 0x1.0p+7);
6737*4bdc9457SAndroid Build Coastguard Worker
6738*4bdc9457SAndroid Build Coastguard Worker const long multiplier = lrintf(-256.0f * input_output_scale);
6739*4bdc9457SAndroid Build Coastguard Worker assert(multiplier <= -1L);
6740*4bdc9457SAndroid Build Coastguard Worker assert(multiplier >= -32768L);
6741*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6742*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.input_zero_point[i] = (uint16_t) input_zero_point;
6743*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.multiplier[i] = (int16_t) multiplier;
6744*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
6745*4bdc9457SAndroid Build Coastguard Worker }
6746*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
6747*4bdc9457SAndroid Build Coastguard Worker }
6748*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6749*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_f32_cvt_scalar_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6750*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_scalar_params(
6751*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6752*4bdc9457SAndroid Build Coastguard Worker float scale,
6753*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6754*4bdc9457SAndroid Build Coastguard Worker {
6755*4bdc9457SAndroid Build Coastguard Worker params->scalar.zero_point = (int32_t) zero_point;
6756*4bdc9457SAndroid Build Coastguard Worker params->scalar.scale = scale;
6757*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->scalar);
6758*4bdc9457SAndroid Build Coastguard Worker }
6759*4bdc9457SAndroid Build Coastguard Worker
6760*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_ARM64
xnn_init_qu8_f32_cvt_neon_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6761*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_neon_params(
6762*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6763*4bdc9457SAndroid Build Coastguard Worker float scale,
6764*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6765*4bdc9457SAndroid Build Coastguard Worker {
6766*4bdc9457SAndroid Build Coastguard Worker params->neon.minus_zero_point[0] = -(int16_t) zero_point;
6767*4bdc9457SAndroid Build Coastguard Worker params->neon.minus_zero_point[1] = -(int16_t) zero_point;
6768*4bdc9457SAndroid Build Coastguard Worker params->neon.scale = scale;
6769*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->neon);
6770*4bdc9457SAndroid Build Coastguard Worker }
6771*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6772*4bdc9457SAndroid Build Coastguard Worker
6773*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_X86 || XNN_ARCH_X86_64
xnn_init_qu8_f32_cvt_sse2_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6774*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_sse2_params(
6775*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6776*4bdc9457SAndroid Build Coastguard Worker float scale,
6777*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6778*4bdc9457SAndroid Build Coastguard Worker {
6779*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6780*4bdc9457SAndroid Build Coastguard Worker params->sse2.magic_exp[i] = UINT16_C(0x4B00);
6781*4bdc9457SAndroid Build Coastguard Worker }
6782*4bdc9457SAndroid Build Coastguard Worker const float magic_bias = (float) (INT32_C(0x00800000) + (int32_t) zero_point);
6783*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6784*4bdc9457SAndroid Build Coastguard Worker params->sse2.magic_bias[i] = magic_bias;
6785*4bdc9457SAndroid Build Coastguard Worker params->sse2.scale[i] = scale;
6786*4bdc9457SAndroid Build Coastguard Worker }
6787*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse2);
6788*4bdc9457SAndroid Build Coastguard Worker }
6789*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_f32_cvt_sse4_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6790*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_sse4_params(
6791*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6792*4bdc9457SAndroid Build Coastguard Worker float scale,
6793*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6794*4bdc9457SAndroid Build Coastguard Worker {
6795*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6796*4bdc9457SAndroid Build Coastguard Worker params->sse4.minus_zero_point[i] = -(int32_t) zero_point;
6797*4bdc9457SAndroid Build Coastguard Worker params->sse4.scale[i] = scale;
6798*4bdc9457SAndroid Build Coastguard Worker }
6799*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->sse4);
6800*4bdc9457SAndroid Build Coastguard Worker }
6801*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_f32_cvt_avx_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6802*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_avx_params(
6803*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6804*4bdc9457SAndroid Build Coastguard Worker float scale,
6805*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6806*4bdc9457SAndroid Build Coastguard Worker {
6807*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 8; i++) {
6808*4bdc9457SAndroid Build Coastguard Worker params->avx.minus_zero_point[i] = -(int32_t) zero_point;
6809*4bdc9457SAndroid Build Coastguard Worker params->avx.scale[i] = scale;
6810*4bdc9457SAndroid Build Coastguard Worker }
6811*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx);
6812*4bdc9457SAndroid Build Coastguard Worker }
6813*4bdc9457SAndroid Build Coastguard Worker
xnn_init_qu8_f32_cvt_avx512_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6814*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_avx512_params(
6815*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6816*4bdc9457SAndroid Build Coastguard Worker float scale,
6817*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6818*4bdc9457SAndroid Build Coastguard Worker {
6819*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 16; i++) {
6820*4bdc9457SAndroid Build Coastguard Worker params->avx512.minus_zero_point[i] = -(int32_t) zero_point;
6821*4bdc9457SAndroid Build Coastguard Worker params->avx512.scale[i] = scale;
6822*4bdc9457SAndroid Build Coastguard Worker }
6823*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->avx512);
6824*4bdc9457SAndroid Build Coastguard Worker }
6825*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6826*4bdc9457SAndroid Build Coastguard Worker
6827*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
xnn_init_qu8_f32_cvt_wasmsimd_params(union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS (1)],float scale,uint8_t zero_point)6828*4bdc9457SAndroid Build Coastguard Worker size_t xnn_init_qu8_f32_cvt_wasmsimd_params(
6829*4bdc9457SAndroid Build Coastguard Worker union xnn_qu8_f32_cvt_params params[XNN_MIN_ELEMENTS(1)],
6830*4bdc9457SAndroid Build Coastguard Worker float scale,
6831*4bdc9457SAndroid Build Coastguard Worker uint8_t zero_point)
6832*4bdc9457SAndroid Build Coastguard Worker {
6833*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 4; i++) {
6834*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.minus_zero_point[i] = -(int16_t) zero_point;
6835*4bdc9457SAndroid Build Coastguard Worker }
6836*4bdc9457SAndroid Build Coastguard Worker for (uint32_t i = 0; i < 2; i++) {
6837*4bdc9457SAndroid Build Coastguard Worker params->wasmsimd.scale[i] = scale;
6838*4bdc9457SAndroid Build Coastguard Worker }
6839*4bdc9457SAndroid Build Coastguard Worker return sizeof(params->wasmsimd);
6840*4bdc9457SAndroid Build Coastguard Worker }
6841*4bdc9457SAndroid Build Coastguard Worker #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
6842