xref: /aosp_15_r20/external/libdav1d/src/arm/64/cdef16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1/*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2020, Martin Storsjo
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 *    list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "src/arm/asm.S"
29#include "util.S"
30#include "cdef_tmpl.S"
31
32.macro pad_top_bot_16 s1, s2, w, stride, reg, ret
33        tst             w7,  #1 // CDEF_HAVE_LEFT
34        b.eq            2f
35        // CDEF_HAVE_LEFT
36        sub             \s1,  \s1,  #4
37        sub             \s2,  \s2,  #4
38        tst             w7,  #2 // CDEF_HAVE_RIGHT
39        b.eq            1f
40        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
41        ldr             \reg\()0, [\s1]
42        ldr             d1,       [\s1, #2*\w]
43        ldr             \reg\()2, [\s2]
44        ldr             d3,       [\s2, #2*\w]
45        str             \reg\()0, [x0]
46        str             d1,       [x0, #2*\w]
47        add             x0,  x0,  #2*\stride
48        str             \reg\()2, [x0]
49        str             d3,       [x0, #2*\w]
50.if \ret
51        ret
52.else
53        add             x0,  x0,  #2*\stride
54        b               3f
55.endif
56
571:
58        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
59        ldr             \reg\()0, [\s1]
60        ldr             s1,       [\s1, #2*\w]
61        ldr             \reg\()2, [\s2]
62        ldr             s3,       [\s2, #2*\w]
63        str             \reg\()0, [x0]
64        str             s1,       [x0, #2*\w]
65        str             s31,      [x0, #2*\w+4]
66        add             x0,  x0,  #2*\stride
67        str             \reg\()2, [x0]
68        str             s3,       [x0, #2*\w]
69        str             s31,      [x0, #2*\w+4]
70.if \ret
71        ret
72.else
73        add             x0,  x0,  #2*\stride
74        b               3f
75.endif
76
772:
78        // !CDEF_HAVE_LEFT
79        tst             w7,  #2 // CDEF_HAVE_RIGHT
80        b.eq            1f
81        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
82        ldr             \reg\()0, [\s1]
83        ldr             s1,       [\s1, #2*\w]
84        ldr             \reg\()2, [\s2]
85        ldr             s3,       [\s2, #2*\w]
86        str             s31, [x0]
87        stur            \reg\()0, [x0, #4]
88        str             s1,       [x0, #4+2*\w]
89        add             x0,  x0,  #2*\stride
90        str             s31, [x0]
91        stur            \reg\()2, [x0, #4]
92        str             s3,       [x0, #4+2*\w]
93.if \ret
94        ret
95.else
96        add             x0,  x0,  #2*\stride
97        b               3f
98.endif
99
1001:
101        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
102        ldr             \reg\()0, [\s1]
103        ldr             \reg\()1, [\s2]
104        str             s31,      [x0]
105        stur            \reg\()0, [x0, #4]
106        str             s31,      [x0, #4+2*\w]
107        add             x0,  x0,  #2*\stride
108        str             s31,      [x0]
109        stur            \reg\()1, [x0, #4]
110        str             s31,      [x0, #4+2*\w]
111.if \ret
112        ret
113.else
114        add             x0,  x0,  #2*\stride
115.endif
1163:
117.endm
118
119.macro load_n_incr_16 dst, src, incr, w
120.if \w == 4
121        ld1             {\dst\().4h}, [\src], \incr
122.else
123        ld1             {\dst\().8h}, [\src], \incr
124.endif
125.endm
126
127// void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src,
128//                                     ptrdiff_t src_stride, const pixel (*left)[2],
129//                                     const pixel *const top,
130//                                     const pixel *const bottom, int h,
131//                                     enum CdefEdgeFlags edges);
132
133.macro padding_func_16 w, stride, reg
134function cdef_padding\w\()_16bpc_neon, export=1
135        movi            v30.8h,  #0x80, lsl #8
136        mov             v31.16b, v30.16b
137        sub             x0,  x0,  #2*(2*\stride+2)
138        tst             w7,  #4 // CDEF_HAVE_TOP
139        b.ne            1f
140        // !CDEF_HAVE_TOP
141        st1             {v30.8h, v31.8h}, [x0], #32
142.if \w == 8
143        st1             {v30.8h, v31.8h}, [x0], #32
144.endif
145        b               3f
1461:
147        // CDEF_HAVE_TOP
148        add             x9,  x4,  x2
149        pad_top_bot_16  x4,  x9, \w, \stride, \reg, 0
150
151        // Middle section
1523:
153        tst             w7,  #1 // CDEF_HAVE_LEFT
154        b.eq            2f
155        // CDEF_HAVE_LEFT
156        tst             w7,  #2 // CDEF_HAVE_RIGHT
157        b.eq            1f
158        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
1590:
160        ld1             {v0.s}[0], [x3], #4
161        ldr             s2,       [x1, #2*\w]
162        load_n_incr_16  v1,  x1,  x2,  \w
163        subs            w6,  w6,  #1
164        str             s0,       [x0]
165        stur            \reg\()1, [x0, #4]
166        str             s2,       [x0, #4+2*\w]
167        add             x0,  x0,  #2*\stride
168        b.gt            0b
169        b               3f
1701:
171        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
172        ld1             {v0.s}[0], [x3], #4
173        load_n_incr_16  v1,  x1,  x2,  \w
174        subs            w6,  w6,  #1
175        str             s0,       [x0]
176        stur            \reg\()1, [x0, #4]
177        str             s31,      [x0, #4+2*\w]
178        add             x0,  x0,  #2*\stride
179        b.gt            1b
180        b               3f
1812:
182        tst             w7,  #2 // CDEF_HAVE_RIGHT
183        b.eq            1f
184        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
1850:
186        ldr             s1,       [x1, #2*\w]
187        load_n_incr_16  v0,  x1,  x2,  \w
188        subs            w6,  w6,  #1
189        str             s31,      [x0]
190        stur            \reg\()0, [x0, #4]
191        str             s1,       [x0, #4+2*\w]
192        add             x0,  x0,  #2*\stride
193        b.gt            0b
194        b               3f
1951:
196        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
197        load_n_incr_16  v0,  x1,  x2,  \w
198        subs            w6,  w6,  #1
199        str             s31,      [x0]
200        stur            \reg\()0, [x0, #4]
201        str             s31,      [x0, #4+2*\w]
202        add             x0,  x0,  #2*\stride
203        b.gt            1b
204
2053:
206        tst             w7,  #8 // CDEF_HAVE_BOTTOM
207        b.ne            1f
208        // !CDEF_HAVE_BOTTOM
209        st1             {v30.8h, v31.8h}, [x0], #32
210.if \w == 8
211        st1             {v30.8h, v31.8h}, [x0], #32
212.endif
213        ret
2141:
215        // CDEF_HAVE_BOTTOM
216        add             x9,  x5,  x2
217        pad_top_bot_16  x5,  x9, \w, \stride, \reg, 1
218endfunc
219.endm
220
221padding_func_16 8, 16, q
222padding_func_16 4, 8,  d
223
224tables
225
226filter 8, 16
227filter 4, 16
228
229find_dir 16
230