1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12 #include <string.h>
13
14 #include "./vpx_dsp_rtcd.h"
15 #include "vpx/vpx_integer.h"
16
vpx_convolve_copy_neon(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const InterpKernel * filter,int x0_q4,int x_step_q4,int y0_q4,int y_step_q4,int w,int h)17 void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
18 uint8_t *dst, ptrdiff_t dst_stride,
19 const InterpKernel *filter, int x0_q4,
20 int x_step_q4, int y0_q4, int y_step_q4, int w,
21 int h) {
22 (void)filter;
23 (void)x0_q4;
24 (void)x_step_q4;
25 (void)y0_q4;
26 (void)y_step_q4;
27
28 if (w < 8) { // copy4
29 do {
30 memcpy(dst, src, 4);
31 src += src_stride;
32 dst += dst_stride;
33 memcpy(dst, src, 4);
34 src += src_stride;
35 dst += dst_stride;
36 h -= 2;
37 } while (h != 0);
38 } else if (w == 8) { // copy8
39 uint8x8_t s0, s1;
40 do {
41 s0 = vld1_u8(src);
42 src += src_stride;
43 s1 = vld1_u8(src);
44 src += src_stride;
45
46 vst1_u8(dst, s0);
47 dst += dst_stride;
48 vst1_u8(dst, s1);
49 dst += dst_stride;
50 h -= 2;
51 } while (h != 0);
52 } else if (w < 32) { // copy16
53 uint8x16_t s0, s1;
54 do {
55 s0 = vld1q_u8(src);
56 src += src_stride;
57 s1 = vld1q_u8(src);
58 src += src_stride;
59
60 vst1q_u8(dst, s0);
61 dst += dst_stride;
62 vst1q_u8(dst, s1);
63 dst += dst_stride;
64 h -= 2;
65 } while (h != 0);
66 } else if (w == 32) { // copy32
67 uint8x16_t s0, s1, s2, s3;
68 do {
69 s0 = vld1q_u8(src);
70 s1 = vld1q_u8(src + 16);
71 src += src_stride;
72 s2 = vld1q_u8(src);
73 s3 = vld1q_u8(src + 16);
74 src += src_stride;
75
76 vst1q_u8(dst, s0);
77 vst1q_u8(dst + 16, s1);
78 dst += dst_stride;
79 vst1q_u8(dst, s2);
80 vst1q_u8(dst + 16, s3);
81 dst += dst_stride;
82 h -= 2;
83 } while (h != 0);
84 } else { // copy64
85 uint8x16_t s0, s1, s2, s3;
86 do {
87 s0 = vld1q_u8(src);
88 s1 = vld1q_u8(src + 16);
89 s2 = vld1q_u8(src + 32);
90 s3 = vld1q_u8(src + 48);
91 src += src_stride;
92
93 vst1q_u8(dst, s0);
94 vst1q_u8(dst + 16, s1);
95 vst1q_u8(dst + 32, s2);
96 vst1q_u8(dst + 48, s3);
97 dst += dst_stride;
98 } while (--h);
99 }
100 }
101