xref: /aosp_15_r20/external/XNNPACK/src/x32-packx/x4-wasmsimd.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2020 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker 
8*4bdc9457SAndroid Build Coastguard Worker #include <wasm_simd128.h>
9*4bdc9457SAndroid Build Coastguard Worker 
10*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/packx.h>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker 
xnn_x32_packx_ukernel_4x__wasmsimd(size_t m,size_t k,const uint32_t * restrict x_ptr,size_t x_stride,uint32_t * restrict y_ptr)13*4bdc9457SAndroid Build Coastguard Worker void xnn_x32_packx_ukernel_4x__wasmsimd(
14*4bdc9457SAndroid Build Coastguard Worker     size_t m,
15*4bdc9457SAndroid Build Coastguard Worker     size_t k,
16*4bdc9457SAndroid Build Coastguard Worker     const uint32_t* restrict x_ptr,
17*4bdc9457SAndroid Build Coastguard Worker     size_t x_stride,
18*4bdc9457SAndroid Build Coastguard Worker     uint32_t* restrict y_ptr)
19*4bdc9457SAndroid Build Coastguard Worker {
20*4bdc9457SAndroid Build Coastguard Worker   assert(m != 0);
21*4bdc9457SAndroid Build Coastguard Worker   assert(k != 0);
22*4bdc9457SAndroid Build Coastguard Worker 
23*4bdc9457SAndroid Build Coastguard Worker   const float* x0 = (const float*) x_ptr;
24*4bdc9457SAndroid Build Coastguard Worker   const float* x1 = (const float*) ((uintptr_t) x0 + x_stride);
25*4bdc9457SAndroid Build Coastguard Worker   if (m < 2) {
26*4bdc9457SAndroid Build Coastguard Worker     x1 = x0;
27*4bdc9457SAndroid Build Coastguard Worker   }
28*4bdc9457SAndroid Build Coastguard Worker   const float* x2 = (const float*) ((uintptr_t) x1 + x_stride);
29*4bdc9457SAndroid Build Coastguard Worker   if (m <= 2) {
30*4bdc9457SAndroid Build Coastguard Worker     x2 = x1;
31*4bdc9457SAndroid Build Coastguard Worker   }
32*4bdc9457SAndroid Build Coastguard Worker   const float* x3 = (const float*) ((uintptr_t) x2 + x_stride);
33*4bdc9457SAndroid Build Coastguard Worker   if (m != 4) {
34*4bdc9457SAndroid Build Coastguard Worker     x3 = x2;
35*4bdc9457SAndroid Build Coastguard Worker   }
36*4bdc9457SAndroid Build Coastguard Worker   float* y = (float*) y_ptr;
37*4bdc9457SAndroid Build Coastguard Worker 
38*4bdc9457SAndroid Build Coastguard Worker   for (; k >= 4; k -= 4) {
39*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx0 = wasm_v128_load(x0);
40*4bdc9457SAndroid Build Coastguard Worker     x0 += 4;
41*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx1 = wasm_v128_load(x1);
42*4bdc9457SAndroid Build Coastguard Worker     x1 += 4;
43*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx2 = wasm_v128_load(x2);
44*4bdc9457SAndroid Build Coastguard Worker     x2 += 4;
45*4bdc9457SAndroid Build Coastguard Worker     const v128_t vx3 = wasm_v128_load(x3);
46*4bdc9457SAndroid Build Coastguard Worker     x3 += 4;
47*4bdc9457SAndroid Build Coastguard Worker 
48*4bdc9457SAndroid Build Coastguard Worker     const v128_t vt0 = wasm_v32x4_shuffle(vx0, vx1, 0, 4, 1, 5);
49*4bdc9457SAndroid Build Coastguard Worker     const v128_t vt1 = wasm_v32x4_shuffle(vx0, vx1, 2, 6, 3, 7);
50*4bdc9457SAndroid Build Coastguard Worker     const v128_t vt2 = wasm_v32x4_shuffle(vx2, vx3, 0, 4, 1, 5);
51*4bdc9457SAndroid Build Coastguard Worker     const v128_t vt3 = wasm_v32x4_shuffle(vx2, vx3, 2, 6, 3, 7);
52*4bdc9457SAndroid Build Coastguard Worker 
53*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy0 = wasm_v32x4_shuffle(vt0, vt2, 0, 1, 4, 5);
54*4bdc9457SAndroid Build Coastguard Worker     wasm_v128_store(y, vy0);
55*4bdc9457SAndroid Build Coastguard Worker 
56*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy1 = wasm_v32x4_shuffle(vt0, vt2, 2, 3, 6, 7);
57*4bdc9457SAndroid Build Coastguard Worker     wasm_v128_store(y + 4, vy1);
58*4bdc9457SAndroid Build Coastguard Worker 
59*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy2 = wasm_v32x4_shuffle(vt1, vt3, 0, 1, 4, 5);
60*4bdc9457SAndroid Build Coastguard Worker     wasm_v128_store(y + 8, vy2);
61*4bdc9457SAndroid Build Coastguard Worker 
62*4bdc9457SAndroid Build Coastguard Worker     const v128_t vy3 = wasm_v32x4_shuffle(vt1, vt3, 2, 3, 6, 7);
63*4bdc9457SAndroid Build Coastguard Worker     wasm_v128_store(y + 12, vy3);
64*4bdc9457SAndroid Build Coastguard Worker 
65*4bdc9457SAndroid Build Coastguard Worker     y += 16;
66*4bdc9457SAndroid Build Coastguard Worker   }
67*4bdc9457SAndroid Build Coastguard Worker   if XNN_UNLIKELY(k != 0) {
68*4bdc9457SAndroid Build Coastguard Worker     do {
69*4bdc9457SAndroid Build Coastguard Worker       const float vx0 = *x0++;
70*4bdc9457SAndroid Build Coastguard Worker       const float vx1 = *x1++;
71*4bdc9457SAndroid Build Coastguard Worker       const float vx2 = *x2++;
72*4bdc9457SAndroid Build Coastguard Worker       const float vx3 = *x3++;
73*4bdc9457SAndroid Build Coastguard Worker       y[0] = vx0;
74*4bdc9457SAndroid Build Coastguard Worker       y[1] = vx1;
75*4bdc9457SAndroid Build Coastguard Worker       y[2] = vx2;
76*4bdc9457SAndroid Build Coastguard Worker       y[3] = vx3;
77*4bdc9457SAndroid Build Coastguard Worker       y += 4;
78*4bdc9457SAndroid Build Coastguard Worker     } while (--k != 0);
79*4bdc9457SAndroid Build Coastguard Worker   }
80*4bdc9457SAndroid Build Coastguard Worker }
81