xref: /aosp_15_r20/external/libdav1d/src/arm/32/cdef16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1/*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2020, Martin Storsjo
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 *    list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "src/arm/asm.S"
29#include "util.S"
30#include "cdef_tmpl.S"
31
32// r1 = d0/q0
33// r2 = d2/q1
34.macro pad_top_bot_16 s1, s2, w, stride, r1, r2, align, ret
35        tst             r7,  #1 // CDEF_HAVE_LEFT
36        beq             2f
37        // CDEF_HAVE_LEFT
38        tst             r7,  #2 // CDEF_HAVE_RIGHT
39        beq             1f
40        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
41        vldr            s8,  [\s1, #-4]
42        vld1.16         {\r1}, [\s1, :\align]
43        vldr            s9,  [\s1, #2*\w]
44        vldr            s10, [\s2, #-4]
45        vld1.16         {\r2}, [\s2, :\align]
46        vldr            s11, [\s2, #2*\w]
47        vstr            s8,  [r0, #-4]
48        vst1.16         {\r1}, [r0, :\align]
49        vstr            s9,  [r0, #2*\w]
50        add             r0,  r0,  #2*\stride
51        vstr            s10, [r0, #-4]
52        vst1.16         {\r2}, [r0, :\align]
53        vstr            s11, [r0, #2*\w]
54.if \ret
55        pop             {r4-r8,pc}
56.else
57        add             r0,  r0,  #2*\stride
58        b               3f
59.endif
60
611:
62        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
63        vldr            s8,  [\s1, #-4]
64        vld1.16         {\r1}, [\s1, :\align]
65        vldr            s9,  [\s2, #-4]
66        vld1.16         {\r2}, [\s2, :\align]
67        vstr            s8,  [r0, #-4]
68        vst1.16         {\r1}, [r0, :\align]
69        vstr            s12, [r0, #2*\w]
70        add             r0,  r0,  #2*\stride
71        vstr            s9,  [r0, #-4]
72        vst1.16         {\r2}, [r0, :\align]
73        vstr            s12, [r0, #2*\w]
74.if \ret
75        pop             {r4-r8,pc}
76.else
77        add             r0,  r0,  #2*\stride
78        b               3f
79.endif
80
812:
82        // !CDEF_HAVE_LEFT
83        tst             r7,  #2 // CDEF_HAVE_RIGHT
84        beq             1f
85        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
86        vld1.16         {\r1}, [\s1, :\align]
87        vldr            s8,  [\s1, #2*\w]
88        vld1.16         {\r2}, [\s2, :\align]
89        vldr            s9,  [\s2, #2*\w]
90        vstr            s12, [r0, #-4]
91        vst1.16         {\r1}, [r0, :\align]
92        vstr            s8,  [r0, #2*\w]
93        add             r0,  r0,  #2*\stride
94        vstr            s12, [r0, #-4]
95        vst1.16         {\r2}, [r0, :\align]
96        vstr            s9,  [r0, #2*\w]
97.if \ret
98        pop             {r4-r8,pc}
99.else
100        add             r0,  r0,  #2*\stride
101        b               3f
102.endif
103
1041:
105        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
106        vld1.16         {\r1}, [\s1, :\align]
107        vld1.16         {\r2}, [\s2, :\align]
108        vstr            s12, [r0, #-4]
109        vst1.16         {\r1}, [r0, :\align]
110        vstr            s12, [r0, #2*\w]
111        add             r0,  r0,  #2*\stride
112        vstr            s12, [r0, #-4]
113        vst1.16         {\r2}, [r0, :\align]
114        vstr            s12, [r0, #2*\w]
115.if \ret
116        pop             {r4-r8,pc}
117.else
118        add             r0,  r0,  #2*\stride
119.endif
1203:
121.endm
122
123// void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src,
124//                                     ptrdiff_t src_stride, const pixel (*left)[2],
125//                                     const pixel *const top,
126//                                     const pixel *const bottom, int h,
127//                                     enum CdefEdgeFlags edges);
128
129// r1 = d0/q0
130// r2 = d2/q1
131.macro padding_func_16 w, stride, r1, r2, align
132function cdef_padding\w\()_16bpc_neon, export=1
133        push            {r4-r8,lr}
134        ldrd            r4,  r5,  [sp, #24]
135        ldrd            r6,  r7,  [sp, #32]
136        vmov.i16        q3,  #0x8000
137        tst             r7,  #4 // CDEF_HAVE_TOP
138        bne             1f
139        // !CDEF_HAVE_TOP
140        sub             r12, r0,  #2*(2*\stride+2)
141        vmov.i16        q2,  #0x8000
142        vst1.16         {q2,q3}, [r12]!
143.if \w == 8
144        vst1.16         {q2,q3}, [r12]!
145.endif
146        b               3f
1471:
148        // CDEF_HAVE_TOP
149        add             r8,  r4,  r2
150        sub             r0,  r0,  #2*(2*\stride)
151        pad_top_bot_16  r4,  r8,  \w, \stride, \r1, \r2, \align, 0
152
153        // Middle section
1543:
155        tst             r7,  #1 // CDEF_HAVE_LEFT
156        beq             2f
157        // CDEF_HAVE_LEFT
158        tst             r7,  #2 // CDEF_HAVE_RIGHT
159        beq             1f
160        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
1610:
162        vld1.32         {d2[]}, [r3, :32]!
163        vldr            s5,  [r1, #2*\w]
164        vld1.16         {\r1}, [r1, :\align], r2
165        subs            r6,  r6,  #1
166        vstr            s4,  [r0, #-4]
167        vst1.16         {\r1}, [r0, :\align]
168        vstr            s5,  [r0, #2*\w]
169        add             r0,  r0,  #2*\stride
170        bgt             0b
171        b               3f
1721:
173        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
174        vld1.32         {d2[]}, [r3, :32]!
175        vld1.16         {\r1}, [r1, :\align], r2
176        subs            r6,  r6,  #1
177        vstr            s4,  [r0, #-4]
178        vst1.16         {\r1}, [r0, :\align]
179        vstr            s12, [r0, #2*\w]
180        add             r0,  r0,  #2*\stride
181        bgt             1b
182        b               3f
1832:
184        tst             r7,  #2 // CDEF_HAVE_RIGHT
185        beq             1f
186        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
1870:
188        vldr            s4,  [r1, #2*\w]
189        vld1.16         {\r1}, [r1, :\align], r2
190        subs            r6,  r6,  #1
191        vstr            s12, [r0, #-4]
192        vst1.16         {\r1}, [r0, :\align]
193        vstr            s4,  [r0, #2*\w]
194        add             r0,  r0,  #2*\stride
195        bgt             0b
196        b               3f
1971:
198        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
199        vld1.16         {\r1}, [r1, :\align], r2
200        subs            r6,  r6,  #1
201        vstr            s12, [r0, #-4]
202        vst1.16         {\r1}, [r0, :\align]
203        vstr            s12, [r0, #2*\w]
204        add             r0,  r0,  #2*\stride
205        bgt             1b
206
2073:
208        tst             r7,  #8 // CDEF_HAVE_BOTTOM
209        bne             1f
210        // !CDEF_HAVE_BOTTOM
211        sub             r12, r0,  #4
212        vmov.i16        q2,  #0x8000
213        vst1.16         {q2,q3}, [r12]!
214.if \w == 8
215        vst1.16         {q2,q3}, [r12]!
216.endif
217        pop             {r4-r8,pc}
2181:
219        // CDEF_HAVE_BOTTOM
220        add             r8,  r5,  r2
221        pad_top_bot_16  r5,  r8,  \w, \stride, \r1, \r2, \align, 1
222endfunc
223.endm
224
225padding_func_16 8, 16, q0, q1, 128
226padding_func_16 4, 8,  d0, d2, 64
227
228tables
229
230filter 8, 16
231filter 4, 16
232
233find_dir 16
234