1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #include <arm_neon.h> 10 11 #include <xnnpack/zip.h> 12 13 xnn_x8_zip_x3_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14void xnn_x8_zip_x3_ukernel__neon( 15 size_t n, 16 const uint8_t* input, 17 uint8_t* output) 18 { 19 const uint8_t* x = input; 20 const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n); 21 const uint8_t* z = (const uint8_t*) ((uintptr_t) y + n); 22 uint8_t* o = output; 23 24 if (n >= 8) { 25 do { 26 uint8x8x3_t vxyz; 27 vxyz.val[0] = vld1_u8(x); x += 8; 28 vxyz.val[1] = vld1_u8(y); y += 8; 29 vxyz.val[2] = vld1_u8(z); z += 8; 30 vst3_u8(o, vxyz); o += 24; 31 n -= 8; 32 } while (n >= 8); 33 if (n != 0) { 34 const size_t address_increment = n - 8; 35 uint8x8x3_t vxyz; 36 vxyz.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); 37 vxyz.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); 38 vxyz.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); 39 vst3_u8((uint8_t*) ((uintptr_t) o + address_increment * 3), vxyz); 40 } 41 } else { 42 do { 43 const uint8_t vx = *x++; 44 const uint8_t vy = *y++; 45 const uint8_t vz = *z++; 46 o[0] = vx; 47 o[1] = vy; 48 o[2] = vz; 49 o += 3; 50 } while (--n != 0); 51 } 52 } 53