xref: /aosp_15_r20/external/XNNPACK/src/math/cvt-f32-qu8-wasmsimd.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2022 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
9*4bdc9457SAndroid Build Coastguard Worker 
10*4bdc9457SAndroid Build Coastguard Worker #include <wasm_simd128.h>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math-stubs.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker 
xnn_math_f32_qu8_cvt__wasmsimd(size_t n,const float * input,uint8_t * output,uint8_t output_zero_point)15*4bdc9457SAndroid Build Coastguard Worker void xnn_math_f32_qu8_cvt__wasmsimd(
16*4bdc9457SAndroid Build Coastguard Worker     size_t n,
17*4bdc9457SAndroid Build Coastguard Worker     const float* input,
18*4bdc9457SAndroid Build Coastguard Worker     uint8_t* output,
19*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_zero_point)
20*4bdc9457SAndroid Build Coastguard Worker {
21*4bdc9457SAndroid Build Coastguard Worker   assert(n % (16 * sizeof(uint8_t)) == 0);
22*4bdc9457SAndroid Build Coastguard Worker 
23*4bdc9457SAndroid Build Coastguard Worker   const v128_t vmin = wasm_f32x4_splat(12582912.0f - (float) (int32_t) output_zero_point);
24*4bdc9457SAndroid Build Coastguard Worker   const v128_t vfmagic = wasm_f32x4_const_splat(12582912.0f);
25*4bdc9457SAndroid Build Coastguard Worker   const v128_t vimagic = wasm_i32x4_splat(INT32_C(0x4B400000) - (int32_t) output_zero_point);
26*4bdc9457SAndroid Build Coastguard Worker   for (; n != 0; n -= 16 * sizeof(uint8_t)) {
27*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx_ll = wasm_v128_load(input);
28*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx_lh = wasm_v128_load(input + 4);
29*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx_hl = wasm_v128_load(input + 8);
30*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx_hh = wasm_v128_load(input + 12);
31*4bdc9457SAndroid Build Coastguard Worker     input += 16;
32*4bdc9457SAndroid Build Coastguard Worker 
33*4bdc9457SAndroid Build Coastguard Worker     v128_t vy_ll = wasm_f32x4_add(vx_ll, vfmagic);
34*4bdc9457SAndroid Build Coastguard Worker     v128_t vy_lh = wasm_f32x4_add(vx_lh, vfmagic);
35*4bdc9457SAndroid Build Coastguard Worker     v128_t vy_hl = wasm_f32x4_add(vx_hl, vfmagic);
36*4bdc9457SAndroid Build Coastguard Worker     v128_t vy_hh = wasm_f32x4_add(vx_hh, vfmagic);
37*4bdc9457SAndroid Build Coastguard Worker 
38*4bdc9457SAndroid Build Coastguard Worker     vy_ll = wasm_i32x4_max(vy_ll, vmin);
39*4bdc9457SAndroid Build Coastguard Worker     vy_lh = wasm_i32x4_max(vy_lh, vmin);
40*4bdc9457SAndroid Build Coastguard Worker     vy_hl = wasm_i32x4_max(vy_hl, vmin);
41*4bdc9457SAndroid Build Coastguard Worker     vy_hh = wasm_i32x4_max(vy_hh, vmin);
42*4bdc9457SAndroid Build Coastguard Worker 
43*4bdc9457SAndroid Build Coastguard Worker     vy_ll = wasm_i32x4_sub(vy_ll, vimagic);
44*4bdc9457SAndroid Build Coastguard Worker     vy_lh = wasm_i32x4_sub(vy_lh, vimagic);
45*4bdc9457SAndroid Build Coastguard Worker     vy_hl = wasm_i32x4_sub(vy_hl, vimagic);
46*4bdc9457SAndroid Build Coastguard Worker     vy_hh = wasm_i32x4_sub(vy_hh, vimagic);
47*4bdc9457SAndroid Build Coastguard Worker 
48*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy_lo = wasm_i16x8_narrow_i32x4(vy_ll, vy_lh);
49*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy_hi = wasm_i16x8_narrow_i32x4(vy_hl, vy_hh);
50*4bdc9457SAndroid Build Coastguard Worker 
51*4bdc9457SAndroid Build Coastguard Worker     const v128_t vout = wasm_u8x16_narrow_i16x8(vy_lo, vy_hi);
52*4bdc9457SAndroid Build Coastguard Worker     wasm_v128_store(output, vout);
53*4bdc9457SAndroid Build Coastguard Worker     output += 16;
54*4bdc9457SAndroid Build Coastguard Worker   }
55*4bdc9457SAndroid Build Coastguard Worker }
56