1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates. 2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved. 3*4bdc9457SAndroid Build Coastguard Worker // 4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC 5*4bdc9457SAndroid Build Coastguard Worker // 6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the 7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree. 8*4bdc9457SAndroid Build Coastguard Worker 9*4bdc9457SAndroid Build Coastguard Worker #include <arm_neon.h> 10*4bdc9457SAndroid Build Coastguard Worker 11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/zip.h> 12*4bdc9457SAndroid Build Coastguard Worker 13*4bdc9457SAndroid Build Coastguard Worker xnn_x8_zip_x3_ukernel__neon(size_t n,const uint8_t * input,uint8_t * output)14*4bdc9457SAndroid Build Coastguard Workervoid xnn_x8_zip_x3_ukernel__neon( 15*4bdc9457SAndroid Build Coastguard Worker size_t n, 16*4bdc9457SAndroid Build Coastguard Worker const uint8_t* input, 17*4bdc9457SAndroid Build Coastguard Worker uint8_t* output) 18*4bdc9457SAndroid Build Coastguard Worker { 19*4bdc9457SAndroid Build Coastguard Worker const uint8_t* x = input; 20*4bdc9457SAndroid Build Coastguard Worker const uint8_t* y = (const uint8_t*) ((uintptr_t) x + n); 21*4bdc9457SAndroid Build Coastguard Worker const uint8_t* z = (const uint8_t*) ((uintptr_t) y + n); 22*4bdc9457SAndroid Build Coastguard Worker uint8_t* o = output; 23*4bdc9457SAndroid Build Coastguard Worker 24*4bdc9457SAndroid Build Coastguard Worker if (n >= 8) { 25*4bdc9457SAndroid Build Coastguard Worker do { 26*4bdc9457SAndroid Build Coastguard Worker uint8x8x3_t vxyz; 27*4bdc9457SAndroid Build Coastguard Worker vxyz.val[0] = vld1_u8(x); x += 8; 28*4bdc9457SAndroid Build Coastguard Worker vxyz.val[1] = vld1_u8(y); y += 8; 29*4bdc9457SAndroid Build Coastguard Worker vxyz.val[2] = vld1_u8(z); z += 8; 30*4bdc9457SAndroid Build Coastguard Worker vst3_u8(o, vxyz); o += 24; 31*4bdc9457SAndroid Build Coastguard Worker n -= 8; 32*4bdc9457SAndroid Build Coastguard Worker } while (n >= 8); 33*4bdc9457SAndroid Build Coastguard Worker if (n != 0) { 34*4bdc9457SAndroid Build Coastguard Worker const size_t address_increment = n - 8; 35*4bdc9457SAndroid Build Coastguard Worker uint8x8x3_t vxyz; 36*4bdc9457SAndroid Build Coastguard Worker vxyz.val[0] = vld1_u8((const uint8_t*) ((uintptr_t) x + address_increment)); 37*4bdc9457SAndroid Build Coastguard Worker vxyz.val[1] = vld1_u8((const uint8_t*) ((uintptr_t) y + address_increment)); 38*4bdc9457SAndroid Build Coastguard Worker vxyz.val[2] = vld1_u8((const uint8_t*) ((uintptr_t) z + address_increment)); 39*4bdc9457SAndroid Build Coastguard Worker vst3_u8((uint8_t*) ((uintptr_t) o + address_increment * 3), vxyz); 40*4bdc9457SAndroid Build Coastguard Worker } 41*4bdc9457SAndroid Build Coastguard Worker } else { 42*4bdc9457SAndroid Build Coastguard Worker do { 43*4bdc9457SAndroid Build Coastguard Worker const uint8_t vx = *x++; 44*4bdc9457SAndroid Build Coastguard Worker const uint8_t vy = *y++; 45*4bdc9457SAndroid Build Coastguard Worker const uint8_t vz = *z++; 46*4bdc9457SAndroid Build Coastguard Worker o[0] = vx; 47*4bdc9457SAndroid Build Coastguard Worker o[1] = vy; 48*4bdc9457SAndroid Build Coastguard Worker o[2] = vz; 49*4bdc9457SAndroid Build Coastguard Worker o += 3; 50*4bdc9457SAndroid Build Coastguard Worker } while (--n != 0); 51*4bdc9457SAndroid Build Coastguard Worker } 52*4bdc9457SAndroid Build Coastguard Worker } 53