xref: /aosp_15_r20/external/XNNPACK/src/x8-zip/x3-neon.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <arm_neon.h>
10*4bdc9457SAndroid Build Coastguard Worker 
11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/zip.h>
12*4bdc9457SAndroid Build Coastguard Worker 
13*4bdc9457SAndroid Build Coastguard Worker 
xnn_x8_zip_x3_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14*4bdc9457SAndroid Build Coastguard Worker void xnn_x8_zip_x3_ukernel__neon(
15*4bdc9457SAndroid Build Coastguard Worker     size_t n,
16*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input,
17*4bdc9457SAndroid Build Coastguard Worker     uint8_t* output)
18*4bdc9457SAndroid Build Coastguard Worker {
19*4bdc9457SAndroid Build Coastguard Worker   const uint8_t* x = input;
20*4bdc9457SAndroid Build Coastguard Worker   const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n);
21*4bdc9457SAndroid Build Coastguard Worker   const uint8_t* z = (const uint8_t*) ((uintptr_t) y + n);
22*4bdc9457SAndroid Build Coastguard Worker   uint8_t* o = output;
23*4bdc9457SAndroid Build Coastguard Worker 
24*4bdc9457SAndroid Build Coastguard Worker   if (n >= 8) {
25*4bdc9457SAndroid Build Coastguard Worker     do {
26*4bdc9457SAndroid Build Coastguard Worker       uint8x8x3_t vxyz;
27*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[0] = vld1_u8(x); x += 8;
28*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[1] = vld1_u8(y); y += 8;
29*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[2] = vld1_u8(z); z += 8;
30*4bdc9457SAndroid Build Coastguard Worker       vst3_u8(o, vxyz); o += 24;
31*4bdc9457SAndroid Build Coastguard Worker       n -= 8;
32*4bdc9457SAndroid Build Coastguard Worker     } while (n >= 8);
33*4bdc9457SAndroid Build Coastguard Worker     if (n != 0) {
34*4bdc9457SAndroid Build Coastguard Worker       const size_t address_increment = n - 8;
35*4bdc9457SAndroid Build Coastguard Worker       uint8x8x3_t vxyz;
36*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment));
37*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment));
38*4bdc9457SAndroid Build Coastguard Worker       vxyz.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment));
39*4bdc9457SAndroid Build Coastguard Worker       vst3_u8((uint8_t*) ((uintptr_t) o + address_increment * 3), vxyz);
40*4bdc9457SAndroid Build Coastguard Worker     }
41*4bdc9457SAndroid Build Coastguard Worker   } else {
42*4bdc9457SAndroid Build Coastguard Worker     do {
43*4bdc9457SAndroid Build Coastguard Worker       const uint8_t vx = *x++;
44*4bdc9457SAndroid Build Coastguard Worker       const uint8_t vy = *y++;
45*4bdc9457SAndroid Build Coastguard Worker       const uint8_t vz = *z++;
46*4bdc9457SAndroid Build Coastguard Worker       o[0] = vx;
47*4bdc9457SAndroid Build Coastguard Worker       o[1] = vy;
48*4bdc9457SAndroid Build Coastguard Worker       o[2] = vz;
49*4bdc9457SAndroid Build Coastguard Worker       o += 3;
50*4bdc9457SAndroid Build Coastguard Worker     } while (--n != 0);
51*4bdc9457SAndroid Build Coastguard Worker   }
52*4bdc9457SAndroid Build Coastguard Worker }
53