1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker // 3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h> 7*4bdc9457SAndroid Build Coastguard Worker 8*4bdc9457SAndroid Build Coastguard Worker #include <arm_neon.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/zip.h> 11*4bdc9457SAndroid Build Coastguard Worker 12*4bdc9457SAndroid Build Coastguard Worker xnn_x32_zip_x2_ukernel__neon(size_t n,const uint32_t * input,uint32_t * output)13*4bdc9457SAndroid Build Coastguard Workervoid xnn_x32_zip_x2_ukernel__neon( 14*4bdc9457SAndroid Build Coastguard Worker size_t n, 15*4bdc9457SAndroid Build Coastguard Worker const uint32_t* input, 16*4bdc9457SAndroid Build Coastguard Worker uint32_t* output) 17*4bdc9457SAndroid Build Coastguard Worker { 18*4bdc9457SAndroid Build Coastguard Worker assert(n != 0); 19*4bdc9457SAndroid Build Coastguard Worker assert(n % 4 == 0); 20*4bdc9457SAndroid Build Coastguard Worker 21*4bdc9457SAndroid Build Coastguard Worker const uint32_t* x = input; 22*4bdc9457SAndroid Build Coastguard Worker const uint32_t* y = (const uint32_t*) ((uintptr_t) x + n); 23*4bdc9457SAndroid Build Coastguard Worker uint32_t* o = output; 24*4bdc9457SAndroid Build Coastguard Worker 25*4bdc9457SAndroid Build Coastguard Worker while (n >= 16) { 26*4bdc9457SAndroid Build Coastguard Worker uint32x4x2_t vxy; 27*4bdc9457SAndroid Build Coastguard Worker vxy.val[0] = vld1q_u32(x); x += 4; 28*4bdc9457SAndroid Build Coastguard Worker vxy.val[1] = vld1q_u32(y); y += 4; 29*4bdc9457SAndroid Build Coastguard Worker vst2q_u32(o, vxy); o += 8; 30*4bdc9457SAndroid Build Coastguard Worker n -= 16; 31*4bdc9457SAndroid Build Coastguard Worker } 32*4bdc9457SAndroid Build Coastguard Worker if XNN_UNLIKELY(n != 0) { 33*4bdc9457SAndroid Build Coastguard Worker if (n & 8) { 34*4bdc9457SAndroid Build Coastguard Worker uint32x2x2_t vxy; 35*4bdc9457SAndroid Build Coastguard Worker vxy.val[0] = vld1_u32(x); x += 2; 36*4bdc9457SAndroid Build Coastguard Worker vxy.val[1] = vld1_u32(y); y += 2; 37*4bdc9457SAndroid Build Coastguard Worker vst2_u32(o, vxy); o += 4; 38*4bdc9457SAndroid Build Coastguard Worker } 39*4bdc9457SAndroid Build Coastguard Worker if (n & 4) { 40*4bdc9457SAndroid Build Coastguard Worker uint32x2_t vxy = vld1_dup_u32(x); 41*4bdc9457SAndroid Build Coastguard Worker vxy = vld1_lane_u32(y, vxy, 1); 42*4bdc9457SAndroid Build Coastguard Worker vst1_u32(o, vxy); 43*4bdc9457SAndroid Build Coastguard Worker } 44*4bdc9457SAndroid Build Coastguard Worker } 45*4bdc9457SAndroid Build Coastguard Worker } 46