xref: /aosp_15_r20/external/libvpx/config/arm-neon/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm.S (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1@ This file was created from a .asm file
2@  using the ads2gas.pl script.
3.syntax unified
4@
5@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
6@
7@  Use of this source code is governed by a BSD-style license
8@  that can be found in the LICENSE file in the root of the source
9@  tree. An additional intellectual property rights grant can be found
10@  in the file PATENTS.  All contributing project authors may
11@  be found in the AUTHORS file in the root of the source tree.
12@
13
14    .global vpx_convolve_avg_neon
15    .type vpx_convolve_avg_neon, function
16    .arm
17    .eabi_attribute 24, 1 @Tag_ABI_align_needed
18    .eabi_attribute 25, 1 @Tag_ABI_align_preserved
19
20    .text
21    .p2align 2
22
23vpx_convolve_avg_neon: @ PROC
24    push                {r4-r6, lr}
25    ldrd                r4, r5, [sp, #36]
26    mov                 r6, r2
27
28    cmp                 r4, #32
29    bgt                 avg64
30    beq                 avg32
31    cmp                 r4, #8
32    bgt                 avg16
33    beq                 avg8
34    b                   avg4
35
36avg64:
37    sub                 lr, r1, #32
38    sub                 r4, r3, #32
39avg64_h:
40    pld                 [r0, r1, lsl #1]
41    vld1.8              {q0-q1}, [r0]!
42    vld1.8              {q2-q3}, [r0], lr
43    pld                 [r2, r3]
44    vld1.8              {q8-q9},   [r6,:128]!
45    vld1.8              {q10-q11}, [r6,:128], r4
46    vrhadd.u8           q0, q0, q8
47    vrhadd.u8           q1, q1, q9
48    vrhadd.u8           q2, q2, q10
49    vrhadd.u8           q3, q3, q11
50    vst1.8              {q0-q1}, [r2,:128]!
51    vst1.8              {q2-q3}, [r2,:128], r4
52    subs                r5, r5, #1
53    bgt                 avg64_h
54    pop                 {r4-r6, pc}
55
56avg32:
57    vld1.8              {q0-q1}, [r0], r1
58    vld1.8              {q2-q3}, [r0], r1
59    vld1.8              {q8-q9},   [r6,:128], r3
60    vld1.8              {q10-q11}, [r6,:128], r3
61    pld                 [r0]
62    vrhadd.u8           q0, q0, q8
63    pld                 [r0, r1]
64    vrhadd.u8           q1, q1, q9
65    pld                 [r6]
66    vrhadd.u8           q2, q2, q10
67    pld                 [r6, r3]
68    vrhadd.u8           q3, q3, q11
69    vst1.8              {q0-q1}, [r2,:128], r3
70    vst1.8              {q2-q3}, [r2,:128], r3
71    subs                r5, r5, #2
72    bgt                 avg32
73    pop                 {r4-r6, pc}
74
75avg16:
76    vld1.8              {q0}, [r0], r1
77    vld1.8              {q1}, [r0], r1
78    vld1.8              {q2}, [r6,:128], r3
79    vld1.8              {q3}, [r6,:128], r3
80    pld                 [r0]
81    pld                 [r0, r1]
82    vrhadd.u8           q0, q0, q2
83    pld                 [r6]
84    pld                 [r6, r3]
85    vrhadd.u8           q1, q1, q3
86    vst1.8              {q0}, [r2,:128], r3
87    vst1.8              {q1}, [r2,:128], r3
88    subs                r5, r5, #2
89    bgt                 avg16
90    pop                 {r4-r6, pc}
91
92avg8:
93    vld1.8              {d0}, [r0], r1
94    vld1.8              {d1}, [r0], r1
95    vld1.8              {d2}, [r6,:64], r3
96    vld1.8              {d3}, [r6,:64], r3
97    pld                 [r0]
98    pld                 [r0, r1]
99    vrhadd.u8           q0, q0, q1
100    pld                 [r6]
101    pld                 [r6, r3]
102    vst1.8              {d0}, [r2,:64], r3
103    vst1.8              {d1}, [r2,:64], r3
104    subs                r5, r5, #2
105    bgt                 avg8
106    pop                 {r4-r6, pc}
107
108avg4:
109    vld1.32             {d0[0]}, [r0], r1
110    vld1.32             {d0[1]}, [r0], r1
111    vld1.32             {d2[0]}, [r6,:32], r3
112    vld1.32             {d2[1]}, [r6,:32], r3
113    vrhadd.u8           d0, d0, d2
114    vst1.32             {d0[0]}, [r2,:32], r3
115    vst1.32             {d0[1]}, [r2,:32], r3
116    subs                r5, r5, #2
117    bgt                 avg4
118    pop                 {r4-r6, pc}
119.size vpx_convolve_avg_neon, .-vpx_convolve_avg_neon    @ ENDP
120
121    .section .note.GNU-stack,"",%progbits
122