xref: /aosp_15_r20/external/XNNPACK/src/cs16-bfly4/samples1-neon.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2022 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
9*4bdc9457SAndroid Build Coastguard Worker 
10*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/fft.h>
12*4bdc9457SAndroid Build Coastguard Worker 
13*4bdc9457SAndroid Build Coastguard Worker #include <arm_neon.h>
14*4bdc9457SAndroid Build Coastguard Worker 
15*4bdc9457SAndroid Build Coastguard Worker 
xnn_cs16_bfly4_samples1_ukernel__neon(size_t samples,int16_t * data,const size_t stride,const int16_t * twiddle)16*4bdc9457SAndroid Build Coastguard Worker void xnn_cs16_bfly4_samples1_ukernel__neon(
17*4bdc9457SAndroid Build Coastguard Worker     size_t samples,
18*4bdc9457SAndroid Build Coastguard Worker     int16_t* data,
19*4bdc9457SAndroid Build Coastguard Worker     const size_t stride,
20*4bdc9457SAndroid Build Coastguard Worker     const int16_t* twiddle)
21*4bdc9457SAndroid Build Coastguard Worker {
22*4bdc9457SAndroid Build Coastguard Worker   assert(samples == 1);
23*4bdc9457SAndroid Build Coastguard Worker   assert(data != NULL);
24*4bdc9457SAndroid Build Coastguard Worker   assert(stride != 0);
25*4bdc9457SAndroid Build Coastguard Worker   assert(twiddle != NULL);
26*4bdc9457SAndroid Build Coastguard Worker 
27*4bdc9457SAndroid Build Coastguard Worker   const int16x8_t vi = vld1q_s16(data);
28*4bdc9457SAndroid Build Coastguard Worker   const int16x8_t vdiv4 = vdupq_n_s16(8191);
29*4bdc9457SAndroid Build Coastguard Worker   const int16x8_t vout = vqrdmulhq_s16(vi, vdiv4);
30*4bdc9457SAndroid Build Coastguard Worker 
31*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vtmp5 = vsub_s16(vget_low_s16(vout), vget_high_s16(vout));
32*4bdc9457SAndroid Build Coastguard Worker   int16x4_t vout0 = vadd_s16(vget_low_s16(vout), vget_high_s16(vout));
33*4bdc9457SAndroid Build Coastguard Worker 
34*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vtmp3 = vadd_s16(vget_low_s16(vout), vget_high_s16(vout));
35*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vtmp4 = vsub_s16(vget_low_s16(vout), vget_high_s16(vout));
36*4bdc9457SAndroid Build Coastguard Worker 
37*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vtmp3hi = vext_s16(vtmp3, vtmp3, 2);
38*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vout2 = vsub_s16(vout0, vtmp3hi);
39*4bdc9457SAndroid Build Coastguard Worker   vout0 = vadd_s16(vout0, vtmp3hi);
40*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vtmp4rev = vrev64_s16(vtmp4);
41*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vout1r3i = vadd_s16(vtmp5, vtmp4rev);
42*4bdc9457SAndroid Build Coastguard Worker   const int16x4_t vout3r1i = vsub_s16(vtmp5, vtmp4rev);
43*4bdc9457SAndroid Build Coastguard Worker 
44*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_u32((void*) data, vreinterpret_u32_s16(vout0), 0); data += 2;
45*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_s16(data, vout1r3i, 0); data += 1;
46*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_s16(data, vout3r1i, 1); data += 1;
47*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_u32((void*) data, vreinterpret_u32_s16(vout2), 0); data += 2;
48*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_s16(data, vout3r1i, 0); data += 1;
49*4bdc9457SAndroid Build Coastguard Worker   vst1_lane_s16(data, vout1r3i, 1);
50*4bdc9457SAndroid Build Coastguard Worker }
51