xref: /aosp_15_r20/external/libdav1d/src/arm/32/cdef.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker#include "cdef_tmpl.S"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker// n1 = s0/d0
33*c0909341SAndroid Build Coastguard Worker// w1 = d0/q0
34*c0909341SAndroid Build Coastguard Worker// n2 = s4/d2
35*c0909341SAndroid Build Coastguard Worker// w2 = d2/q1
36*c0909341SAndroid Build Coastguard Worker.macro pad_top_bottom s1, s2, w, stride, n1, w1, n2, w2, align, ret
37*c0909341SAndroid Build Coastguard Worker        tst             r7,  #1 // CDEF_HAVE_LEFT
38*c0909341SAndroid Build Coastguard Worker        beq             2f
39*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT
40*c0909341SAndroid Build Coastguard Worker        tst             r7,  #2 // CDEF_HAVE_RIGHT
41*c0909341SAndroid Build Coastguard Worker        beq             1f
42*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
43*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s1, #-2]
44*c0909341SAndroid Build Coastguard Worker        vldr            \n1, [\s1]
45*c0909341SAndroid Build Coastguard Worker        vdup.16         d4,  r12
46*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s1, #\w]
47*c0909341SAndroid Build Coastguard Worker        vmov.16         d4[1], r12
48*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s2, #-2]
49*c0909341SAndroid Build Coastguard Worker        vldr            \n2, [\s2]
50*c0909341SAndroid Build Coastguard Worker        vmov.16         d4[2], r12
51*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s2, #\w]
52*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
53*c0909341SAndroid Build Coastguard Worker        vmov.16         d4[3], r12
54*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
55*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d4
56*c0909341SAndroid Build Coastguard Worker        vstr            s8,  [r0, #-4]
57*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
58*c0909341SAndroid Build Coastguard Worker        vstr            s9,  [r0, #2*\w]
59*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
60*c0909341SAndroid Build Coastguard Worker        vstr            s10, [r0, #-4]
61*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w2}, [r0, :\align]
62*c0909341SAndroid Build Coastguard Worker        vstr            s11, [r0, #2*\w]
63*c0909341SAndroid Build Coastguard Worker.if \ret
64*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
65*c0909341SAndroid Build Coastguard Worker.else
66*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
67*c0909341SAndroid Build Coastguard Worker        b               3f
68*c0909341SAndroid Build Coastguard Worker.endif
69*c0909341SAndroid Build Coastguard Worker
70*c0909341SAndroid Build Coastguard Worker1:
71*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
72*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s1, #-2]
73*c0909341SAndroid Build Coastguard Worker        vldr            \n1, [\s1]
74*c0909341SAndroid Build Coastguard Worker        vdup.16         d4,  r12
75*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s2, #-2]
76*c0909341SAndroid Build Coastguard Worker        vldr            \n2, [\s2]
77*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
78*c0909341SAndroid Build Coastguard Worker        vmov.16         d4[1], r12
79*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
80*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d4
81*c0909341SAndroid Build Coastguard Worker        vstr            s8,  [r0, #-4]
82*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
83*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
84*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
85*c0909341SAndroid Build Coastguard Worker        vstr            s9,  [r0, #-4]
86*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w2}, [r0, :\align]
87*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
88*c0909341SAndroid Build Coastguard Worker.if \ret
89*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
90*c0909341SAndroid Build Coastguard Worker.else
91*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
92*c0909341SAndroid Build Coastguard Worker        b               3f
93*c0909341SAndroid Build Coastguard Worker.endif
94*c0909341SAndroid Build Coastguard Worker
95*c0909341SAndroid Build Coastguard Worker2:
96*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_LEFT
97*c0909341SAndroid Build Coastguard Worker        tst             r7,  #2 // CDEF_HAVE_RIGHT
98*c0909341SAndroid Build Coastguard Worker        beq             1f
99*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
100*c0909341SAndroid Build Coastguard Worker        vldr            \n1, [\s1]
101*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s1, #\w]
102*c0909341SAndroid Build Coastguard Worker        vldr            \n2, [\s2]
103*c0909341SAndroid Build Coastguard Worker        vdup.16         d4,  r12
104*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [\s2, #\w]
105*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
106*c0909341SAndroid Build Coastguard Worker        vmov.16         d4[1], r12
107*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
108*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d4
109*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
110*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
111*c0909341SAndroid Build Coastguard Worker        vstr            s8,  [r0, #2*\w]
112*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
113*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
114*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w2}, [r0, :\align]
115*c0909341SAndroid Build Coastguard Worker        vstr            s9,  [r0, #2*\w]
116*c0909341SAndroid Build Coastguard Worker.if \ret
117*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
118*c0909341SAndroid Build Coastguard Worker.else
119*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
120*c0909341SAndroid Build Coastguard Worker        b               3f
121*c0909341SAndroid Build Coastguard Worker.endif
122*c0909341SAndroid Build Coastguard Worker
123*c0909341SAndroid Build Coastguard Worker1:
124*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
125*c0909341SAndroid Build Coastguard Worker        vldr            \n1, [\s1]
126*c0909341SAndroid Build Coastguard Worker        vldr            \n2, [\s2]
127*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
128*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
129*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
130*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
131*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
132*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
133*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
134*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w2}, [r0, :\align]
135*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
136*c0909341SAndroid Build Coastguard Worker.if \ret
137*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
138*c0909341SAndroid Build Coastguard Worker.else
139*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
140*c0909341SAndroid Build Coastguard Worker.endif
141*c0909341SAndroid Build Coastguard Worker3:
142*c0909341SAndroid Build Coastguard Worker.endm
143*c0909341SAndroid Build Coastguard Worker
144*c0909341SAndroid Build Coastguard Worker.macro load_n_incr dst, src, incr, w
145*c0909341SAndroid Build Coastguard Worker.if \w == 4
146*c0909341SAndroid Build Coastguard Worker        vld1.32         {\dst\()[0]}, [\src, :32], \incr
147*c0909341SAndroid Build Coastguard Worker.else
148*c0909341SAndroid Build Coastguard Worker        vld1.8          {\dst\()},    [\src, :64], \incr
149*c0909341SAndroid Build Coastguard Worker.endif
150*c0909341SAndroid Build Coastguard Worker.endm
151*c0909341SAndroid Build Coastguard Worker
152*c0909341SAndroid Build Coastguard Worker// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src,
153*c0909341SAndroid Build Coastguard Worker//                                    ptrdiff_t src_stride, const pixel (*left)[2],
154*c0909341SAndroid Build Coastguard Worker//                                    const pixel *const top,
155*c0909341SAndroid Build Coastguard Worker//                                    const pixel *const bottom, int h,
156*c0909341SAndroid Build Coastguard Worker//                                    enum CdefEdgeFlags edges);
157*c0909341SAndroid Build Coastguard Worker
158*c0909341SAndroid Build Coastguard Worker// n1 = s0/d0
159*c0909341SAndroid Build Coastguard Worker// w1 = d0/q0
160*c0909341SAndroid Build Coastguard Worker// n2 = s4/d2
161*c0909341SAndroid Build Coastguard Worker// w2 = d2/q1
162*c0909341SAndroid Build Coastguard Worker.macro padding_func w, stride, n1, w1, n2, w2, align
163*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_8bpc_neon, export=1
164*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
165*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
166*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #32]
167*c0909341SAndroid Build Coastguard Worker        cmp             r7,  #0xf // fully edged
168*c0909341SAndroid Build Coastguard Worker        beq             cdef_padding\w\()_edged_8bpc_neon
169*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0x8000
170*c0909341SAndroid Build Coastguard Worker        tst             r7,  #4 // CDEF_HAVE_TOP
171*c0909341SAndroid Build Coastguard Worker        bne             1f
172*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_TOP
173*c0909341SAndroid Build Coastguard Worker        sub             r12, r0,  #2*(2*\stride+2)
174*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0x8000
175*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,q3}, [r12]!
176*c0909341SAndroid Build Coastguard Worker.if \w == 8
177*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,q3}, [r12]!
178*c0909341SAndroid Build Coastguard Worker.endif
179*c0909341SAndroid Build Coastguard Worker        b               3f
180*c0909341SAndroid Build Coastguard Worker1:
181*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_TOP
182*c0909341SAndroid Build Coastguard Worker        add             r8,  r4,  r2
183*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  #2*(2*\stride)
184*c0909341SAndroid Build Coastguard Worker        pad_top_bottom  r4,  r8,  \w, \stride, \n1, \w1, \n2, \w2, \align, 0
185*c0909341SAndroid Build Coastguard Worker
186*c0909341SAndroid Build Coastguard Worker        // Middle section
187*c0909341SAndroid Build Coastguard Worker3:
188*c0909341SAndroid Build Coastguard Worker        tst             r7,  #1 // CDEF_HAVE_LEFT
189*c0909341SAndroid Build Coastguard Worker        beq             2f
190*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT
191*c0909341SAndroid Build Coastguard Worker        tst             r7,  #2 // CDEF_HAVE_RIGHT
192*c0909341SAndroid Build Coastguard Worker        beq             1f
193*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
194*c0909341SAndroid Build Coastguard Worker0:
195*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[]}, [r3, :16]!
196*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r1, #\w]
197*c0909341SAndroid Build Coastguard Worker        load_n_incr     d0,  r1,  r2,  \w
198*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #1
199*c0909341SAndroid Build Coastguard Worker        vmov.16         d2[1], r12
200*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
201*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
202*c0909341SAndroid Build Coastguard Worker        vstr            s4,  [r0, #-4]
203*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
204*c0909341SAndroid Build Coastguard Worker        vstr            s5,  [r0, #2*\w]
205*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
206*c0909341SAndroid Build Coastguard Worker        bgt             0b
207*c0909341SAndroid Build Coastguard Worker        b               3f
208*c0909341SAndroid Build Coastguard Worker1:
209*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
210*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[]}, [r3, :16]!
211*c0909341SAndroid Build Coastguard Worker        load_n_incr     d0,  r1,  r2,  \w
212*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #1
213*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
214*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
215*c0909341SAndroid Build Coastguard Worker        vstr            s4,  [r0, #-4]
216*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
217*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
218*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
219*c0909341SAndroid Build Coastguard Worker        bgt             1b
220*c0909341SAndroid Build Coastguard Worker        b               3f
221*c0909341SAndroid Build Coastguard Worker2:
222*c0909341SAndroid Build Coastguard Worker        tst             r7,  #2 // CDEF_HAVE_RIGHT
223*c0909341SAndroid Build Coastguard Worker        beq             1f
224*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
225*c0909341SAndroid Build Coastguard Worker0:
226*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r1, #\w]
227*c0909341SAndroid Build Coastguard Worker        load_n_incr     d0,  r1,  r2,  \w
228*c0909341SAndroid Build Coastguard Worker        vdup.16         d2,  r12
229*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #1
230*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
231*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2
232*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
233*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
234*c0909341SAndroid Build Coastguard Worker        vstr            s4,  [r0, #2*\w]
235*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
236*c0909341SAndroid Build Coastguard Worker        bgt             0b
237*c0909341SAndroid Build Coastguard Worker        b               3f
238*c0909341SAndroid Build Coastguard Worker1:
239*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
240*c0909341SAndroid Build Coastguard Worker        load_n_incr     d0,  r1,  r2,  \w
241*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #1
242*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0
243*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #-4]
244*c0909341SAndroid Build Coastguard Worker        vst1.16         {\w1}, [r0, :\align]
245*c0909341SAndroid Build Coastguard Worker        vstr            s12, [r0, #2*\w]
246*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
247*c0909341SAndroid Build Coastguard Worker        bgt             1b
248*c0909341SAndroid Build Coastguard Worker
249*c0909341SAndroid Build Coastguard Worker3:
250*c0909341SAndroid Build Coastguard Worker        tst             r7,  #8 // CDEF_HAVE_BOTTOM
251*c0909341SAndroid Build Coastguard Worker        bne             1f
252*c0909341SAndroid Build Coastguard Worker        // !CDEF_HAVE_BOTTOM
253*c0909341SAndroid Build Coastguard Worker        sub             r12, r0,  #4
254*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0x8000
255*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,q3}, [r12]!
256*c0909341SAndroid Build Coastguard Worker.if \w == 8
257*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,q3}, [r12]!
258*c0909341SAndroid Build Coastguard Worker.endif
259*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
260*c0909341SAndroid Build Coastguard Worker1:
261*c0909341SAndroid Build Coastguard Worker        // CDEF_HAVE_BOTTOM
262*c0909341SAndroid Build Coastguard Worker        add             r8,  r5,  r2
263*c0909341SAndroid Build Coastguard Worker        pad_top_bottom  r5,  r8,  \w, \stride, \n1, \w1, \n2, \w2, \align, 1
264*c0909341SAndroid Build Coastguard Workerendfunc
265*c0909341SAndroid Build Coastguard Worker.endm
266*c0909341SAndroid Build Coastguard Worker
267*c0909341SAndroid Build Coastguard Workerpadding_func 8, 16, d0, q0, d2, q1, 128
268*c0909341SAndroid Build Coastguard Workerpadding_func 4, 8,  s0, d0, s4, d2, 64
269*c0909341SAndroid Build Coastguard Worker
270*c0909341SAndroid Build Coastguard Worker// void cdef_paddingX_edged_8bpc_neon(uint16_t *tmp, const pixel *src,
271*c0909341SAndroid Build Coastguard Worker//                                    ptrdiff_t src_stride, const pixel (*left)[2],
272*c0909341SAndroid Build Coastguard Worker//                                    const pixel *const top,
273*c0909341SAndroid Build Coastguard Worker//                                    const pixel *const bottom, int h,
274*c0909341SAndroid Build Coastguard Worker//                                    enum CdefEdgeFlags edges);
275*c0909341SAndroid Build Coastguard Worker
276*c0909341SAndroid Build Coastguard Worker.macro padding_func_edged w, stride, reg, align
277*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_edged_8bpc_neon
278*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  #(2*\stride)
279*c0909341SAndroid Build Coastguard Worker
280*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r4, #-2]
281*c0909341SAndroid Build Coastguard Worker        vldr            \reg, [r4]
282*c0909341SAndroid Build Coastguard Worker        add             r8,  r4,  r2
283*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #-2]
284*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r4, #\w]
285*c0909341SAndroid Build Coastguard Worker        vstr            \reg, [r0]
286*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\w]
287*c0909341SAndroid Build Coastguard Worker
288*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r8, #-2]
289*c0909341SAndroid Build Coastguard Worker        vldr            \reg, [r8]
290*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\stride-2]
291*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r8, #\w]
292*c0909341SAndroid Build Coastguard Worker        vstr            \reg, [r0, #\stride]
293*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\stride+\w]
294*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*\stride
295*c0909341SAndroid Build Coastguard Worker
296*c0909341SAndroid Build Coastguard Worker0:
297*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r3], #2
298*c0909341SAndroid Build Coastguard Worker        vldr            \reg, [r1]
299*c0909341SAndroid Build Coastguard Worker        str             r12, [r0, #-2]
300*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r1, #\w]
301*c0909341SAndroid Build Coastguard Worker        add             r1,  r1,  r2
302*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #1
303*c0909341SAndroid Build Coastguard Worker        vstr            \reg, [r0]
304*c0909341SAndroid Build Coastguard Worker        str             r12, [r0, #\w]
305*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #\stride
306*c0909341SAndroid Build Coastguard Worker        bgt             0b
307*c0909341SAndroid Build Coastguard Worker
308*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r5, #-2]
309*c0909341SAndroid Build Coastguard Worker        vldr            \reg, [r5]
310*c0909341SAndroid Build Coastguard Worker        add             r8,  r5,  r2
311*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #-2]
312*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r5, #\w]
313*c0909341SAndroid Build Coastguard Worker        vstr            \reg, [r0]
314*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\w]
315*c0909341SAndroid Build Coastguard Worker
316*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r8, #-2]
317*c0909341SAndroid Build Coastguard Worker        vldr            \reg, [r8]
318*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\stride-2]
319*c0909341SAndroid Build Coastguard Worker        ldrh            r12, [r8, #\w]
320*c0909341SAndroid Build Coastguard Worker        vstr            \reg, [r0, #\stride]
321*c0909341SAndroid Build Coastguard Worker        strh            r12, [r0, #\stride+\w]
322*c0909341SAndroid Build Coastguard Worker
323*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8,pc}
324*c0909341SAndroid Build Coastguard Workerendfunc
325*c0909341SAndroid Build Coastguard Worker.endm
326*c0909341SAndroid Build Coastguard Worker
327*c0909341SAndroid Build Coastguard Workerpadding_func_edged 8, 16, d0, 64
328*c0909341SAndroid Build Coastguard Workerpadding_func_edged 4, 8,  s0, 32
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Workertables
331*c0909341SAndroid Build Coastguard Worker
332*c0909341SAndroid Build Coastguard Workerfilter 8, 8
333*c0909341SAndroid Build Coastguard Workerfilter 4, 8
334*c0909341SAndroid Build Coastguard Worker
335*c0909341SAndroid Build Coastguard Workerfind_dir 8
336*c0909341SAndroid Build Coastguard Worker
337*c0909341SAndroid Build Coastguard Worker.macro load_px_8 d11, d12, d21, d22, w
338*c0909341SAndroid Build Coastguard Worker.if \w == 8
339*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  r9         // x + off
340*c0909341SAndroid Build Coastguard Worker        sub             r9,  r2,  r9         // x - off
341*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d11}, [r6]         // p0
342*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #16        // += stride
343*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d21}, [r9]         // p1
344*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #16        // += stride
345*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d12}, [r6]         // p0
346*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d22}, [r9]         // p1
347*c0909341SAndroid Build Coastguard Worker.else
348*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  r9         // x + off
349*c0909341SAndroid Build Coastguard Worker        sub             r9,  r2,  r9         // x - off
350*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d11[0]}, [r6]      // p0
351*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #8         // += stride
352*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d21[0]}, [r9]      // p1
353*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #8         // += stride
354*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d11[1]}, [r6]      // p0
355*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #8         // += stride
356*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d21[1]}, [r9]      // p1
357*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #8         // += stride
358*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d12[0]}, [r6]      // p0
359*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #8         // += stride
360*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d22[0]}, [r9]      // p1
361*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #8         // += stride
362*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d12[1]}, [r6]      // p0
363*c0909341SAndroid Build Coastguard Worker        vld1.32         {\d22[1]}, [r9]      // p1
364*c0909341SAndroid Build Coastguard Worker.endif
365*c0909341SAndroid Build Coastguard Worker.endm
366*c0909341SAndroid Build Coastguard Worker.macro handle_pixel_8 s1, s2, thresh_vec, shift, tap, min
367*c0909341SAndroid Build Coastguard Worker.if \min
368*c0909341SAndroid Build Coastguard Worker        vmin.u8         q3,  q3,  \s1
369*c0909341SAndroid Build Coastguard Worker        vmax.u8         q4,  q4,  \s1
370*c0909341SAndroid Build Coastguard Worker        vmin.u8         q3,  q3,  \s2
371*c0909341SAndroid Build Coastguard Worker        vmax.u8         q4,  q4,  \s2
372*c0909341SAndroid Build Coastguard Worker.endif
373*c0909341SAndroid Build Coastguard Worker        vabd.u8         q8,  q0,  \s1        // abs(diff)
374*c0909341SAndroid Build Coastguard Worker        vabd.u8         q11, q0,  \s2        // abs(diff)
375*c0909341SAndroid Build Coastguard Worker        vshl.u8         q9,  q8,  \shift     // abs(diff) >> shift
376*c0909341SAndroid Build Coastguard Worker        vshl.u8         q12, q11, \shift     // abs(diff) >> shift
377*c0909341SAndroid Build Coastguard Worker        vqsub.u8        q9,  \thresh_vec, q9 // clip = imax(0, threshold - (abs(diff) >> shift))
378*c0909341SAndroid Build Coastguard Worker        vqsub.u8        q12, \thresh_vec, q12// clip = imax(0, threshold - (abs(diff) >> shift))
379*c0909341SAndroid Build Coastguard Worker        vcgt.u8         q10, q0,  \s1        // px > p0
380*c0909341SAndroid Build Coastguard Worker        vcgt.u8         q13, q0,  \s2        // px > p1
381*c0909341SAndroid Build Coastguard Worker        vmin.u8         q9,  q9,  q8         // imin(abs(diff), clip)
382*c0909341SAndroid Build Coastguard Worker        vmin.u8         q12, q12, q11        // imin(abs(diff), clip)
383*c0909341SAndroid Build Coastguard Worker        vneg.s8         q8,  q9              // -imin()
384*c0909341SAndroid Build Coastguard Worker        vneg.s8         q11, q12             // -imin()
385*c0909341SAndroid Build Coastguard Worker        vbsl            q10, q8,  q9         // constrain() = imax(imin(diff, clip), -clip)
386*c0909341SAndroid Build Coastguard Worker        vdup.8          d18, \tap            // taps[k]
387*c0909341SAndroid Build Coastguard Worker        vbsl            q13, q11, q12        // constrain() = imax(imin(diff, clip), -clip)
388*c0909341SAndroid Build Coastguard Worker        vmlal.s8        q1,  d20, d18        // sum += taps[k] * constrain()
389*c0909341SAndroid Build Coastguard Worker        vmlal.s8        q1,  d26, d18        // sum += taps[k] * constrain()
390*c0909341SAndroid Build Coastguard Worker        vmlal.s8        q2,  d21, d18        // sum += taps[k] * constrain()
391*c0909341SAndroid Build Coastguard Worker        vmlal.s8        q2,  d27, d18        // sum += taps[k] * constrain()
392*c0909341SAndroid Build Coastguard Worker.endm
393*c0909341SAndroid Build Coastguard Worker
394*c0909341SAndroid Build Coastguard Worker// void cdef_filterX_edged_neon(pixel *dst, ptrdiff_t dst_stride,
395*c0909341SAndroid Build Coastguard Worker//                              const uint16_t *tmp, int pri_strength,
396*c0909341SAndroid Build Coastguard Worker//                              int sec_strength, int dir, int damping,
397*c0909341SAndroid Build Coastguard Worker//                              int h, size_t edges);
398*c0909341SAndroid Build Coastguard Worker.macro filter_func_8 w, pri, sec, min, suffix
399*c0909341SAndroid Build Coastguard Workerfunction cdef_filter\w\suffix\()_edged_neon
400*c0909341SAndroid Build Coastguard Worker.if \pri
401*c0909341SAndroid Build Coastguard Worker        movrel_local    r8,  pri_taps
402*c0909341SAndroid Build Coastguard Worker        and             r9,  r3,  #1
403*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  r9, lsl #1
404*c0909341SAndroid Build Coastguard Worker.endif
405*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  directions\w
406*c0909341SAndroid Build Coastguard Worker        add             r5,  r9,  r5, lsl #1
407*c0909341SAndroid Build Coastguard Worker        vmov.u8         d17, #7
408*c0909341SAndroid Build Coastguard Worker        vdup.8          d16, r6              // damping
409*c0909341SAndroid Build Coastguard Worker
410*c0909341SAndroid Build Coastguard Worker        vmov.8          d8[0], r3
411*c0909341SAndroid Build Coastguard Worker        vmov.8          d8[1], r4
412*c0909341SAndroid Build Coastguard Worker        vclz.i8         d8,  d8              // clz(threshold)
413*c0909341SAndroid Build Coastguard Worker        vsub.i8         d8,  d17, d8         // ulog2(threshold)
414*c0909341SAndroid Build Coastguard Worker        vqsub.u8        d8,  d16, d8         // shift = imax(0, damping - ulog2(threshold))
415*c0909341SAndroid Build Coastguard Worker        vneg.s8         d8,  d8              // -shift
416*c0909341SAndroid Build Coastguard Worker.if \sec
417*c0909341SAndroid Build Coastguard Worker        vdup.8          q6,  d8[1]
418*c0909341SAndroid Build Coastguard Worker.endif
419*c0909341SAndroid Build Coastguard Worker.if \pri
420*c0909341SAndroid Build Coastguard Worker        vdup.8          q5,  d8[0]
421*c0909341SAndroid Build Coastguard Worker.endif
422*c0909341SAndroid Build Coastguard Worker
423*c0909341SAndroid Build Coastguard Worker1:
424*c0909341SAndroid Build Coastguard Worker.if \w == 8
425*c0909341SAndroid Build Coastguard Worker        add             r12, r2,  #16
426*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r2,  :64]    // px
427*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1},  [r12, :64]    // px
428*c0909341SAndroid Build Coastguard Worker.else
429*c0909341SAndroid Build Coastguard Worker        add             r12, r2,  #8
430*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[0]},  [r2,  :32] // px
431*c0909341SAndroid Build Coastguard Worker        add             r9,  r2,  #2*8
432*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]},  [r12, :32] // px
433*c0909341SAndroid Build Coastguard Worker        add             r12, r12, #2*8
434*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[0]},  [r9,  :32] // px
435*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]},  [r12, :32] // px
436*c0909341SAndroid Build Coastguard Worker.endif
437*c0909341SAndroid Build Coastguard Worker
438*c0909341SAndroid Build Coastguard Worker        vmov.u8         q1,  #0              // sum
439*c0909341SAndroid Build Coastguard Worker        vmov.u8         q2,  #0              // sum
440*c0909341SAndroid Build Coastguard Worker.if \min
441*c0909341SAndroid Build Coastguard Worker        vmov.u16        q3,  q0              // min
442*c0909341SAndroid Build Coastguard Worker        vmov.u16        q4,  q0              // max
443*c0909341SAndroid Build Coastguard Worker.endif
444*c0909341SAndroid Build Coastguard Worker
445*c0909341SAndroid Build Coastguard Worker        // Instead of loading sec_taps 2, 1 from memory, just set it
446*c0909341SAndroid Build Coastguard Worker        // to 2 initially and decrease for the second round.
447*c0909341SAndroid Build Coastguard Worker        // This is also used as loop counter.
448*c0909341SAndroid Build Coastguard Worker        mov             lr,  #2              // sec_taps[0]
449*c0909341SAndroid Build Coastguard Worker
450*c0909341SAndroid Build Coastguard Worker2:
451*c0909341SAndroid Build Coastguard Worker.if \pri
452*c0909341SAndroid Build Coastguard Worker        ldrsb           r9,  [r5]            // off1
453*c0909341SAndroid Build Coastguard Worker
454*c0909341SAndroid Build Coastguard Worker        load_px_8       d28, d29, d30, d31, \w
455*c0909341SAndroid Build Coastguard Worker.endif
456*c0909341SAndroid Build Coastguard Worker
457*c0909341SAndroid Build Coastguard Worker.if \sec
458*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  #4         // +2*2
459*c0909341SAndroid Build Coastguard Worker        ldrsb           r9,  [r5]            // off2
460*c0909341SAndroid Build Coastguard Worker.endif
461*c0909341SAndroid Build Coastguard Worker
462*c0909341SAndroid Build Coastguard Worker.if \pri
463*c0909341SAndroid Build Coastguard Worker        ldrb            r12, [r8]            // *pri_taps
464*c0909341SAndroid Build Coastguard Worker        vdup.8          q7,  r3              // threshold
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker        handle_pixel_8  q14, q15, q7,  q5,  r12, \min
467*c0909341SAndroid Build Coastguard Worker.endif
468*c0909341SAndroid Build Coastguard Worker
469*c0909341SAndroid Build Coastguard Worker.if \sec
470*c0909341SAndroid Build Coastguard Worker        load_px_8       d28, d29, d30, d31, \w
471*c0909341SAndroid Build Coastguard Worker
472*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  #8         // +2*4
473*c0909341SAndroid Build Coastguard Worker        ldrsb           r9,  [r5]            // off3
474*c0909341SAndroid Build Coastguard Worker
475*c0909341SAndroid Build Coastguard Worker        vdup.8          q7,  r4              // threshold
476*c0909341SAndroid Build Coastguard Worker
477*c0909341SAndroid Build Coastguard Worker        handle_pixel_8  q14, q15, q7,  q6,  lr, \min
478*c0909341SAndroid Build Coastguard Worker
479*c0909341SAndroid Build Coastguard Worker        load_px_8       d28, d29, d30, d31, \w
480*c0909341SAndroid Build Coastguard Worker
481*c0909341SAndroid Build Coastguard Worker        handle_pixel_8  q14, q15, q7,  q6,  lr, \min
482*c0909341SAndroid Build Coastguard Worker
483*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  #11        // r5 -= 2*(2+4); r5 += 1;
484*c0909341SAndroid Build Coastguard Worker.else
485*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  #1         // r5 += 1
486*c0909341SAndroid Build Coastguard Worker.endif
487*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #1         // sec_tap-- (value)
488*c0909341SAndroid Build Coastguard Worker.if \pri
489*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  #1         // pri_taps++ (pointer)
490*c0909341SAndroid Build Coastguard Worker.endif
491*c0909341SAndroid Build Coastguard Worker        bne             2b
492*c0909341SAndroid Build Coastguard Worker
493*c0909341SAndroid Build Coastguard Worker        vshr.s16        q14, q1,  #15        // -(sum < 0)
494*c0909341SAndroid Build Coastguard Worker        vshr.s16        q15, q2,  #15        // -(sum < 0)
495*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q14        // sum - (sum < 0)
496*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q15        // sum - (sum < 0)
497*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q1,  q1,  #4         // (8 + sum - (sum < 0)) >> 4
498*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4         // (8 + sum - (sum < 0)) >> 4
499*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q1,  q1,  d0         // px + (8 + sum ...) >> 4
500*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q2,  q2,  d1         // px + (8 + sum ...) >> 4
501*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q1
502*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q2
503*c0909341SAndroid Build Coastguard Worker.if \min
504*c0909341SAndroid Build Coastguard Worker        vmin.u8         q0,  q0,  q4
505*c0909341SAndroid Build Coastguard Worker        vmax.u8         q0,  q0,  q3         // iclip(px + .., min, max)
506*c0909341SAndroid Build Coastguard Worker.endif
507*c0909341SAndroid Build Coastguard Worker.if \w == 8
508*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0}, [r0, :64], r1
509*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2*16      // tmp += 2*tmp_stride
510*c0909341SAndroid Build Coastguard Worker        subs            r7,  r7,  #2         // h -= 2
511*c0909341SAndroid Build Coastguard Worker        vst1.8          {d1}, [r0, :64], r1
512*c0909341SAndroid Build Coastguard Worker.else
513*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]}, [r0, :32], r1
514*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4*8       // tmp += 4*tmp_stride
515*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[1]}, [r0, :32], r1
516*c0909341SAndroid Build Coastguard Worker        subs            r7,  r7,  #4         // h -= 4
517*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]}, [r0, :32], r1
518*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[1]}, [r0, :32], r1
519*c0909341SAndroid Build Coastguard Worker.endif
520*c0909341SAndroid Build Coastguard Worker
521*c0909341SAndroid Build Coastguard Worker        // Reset pri_taps and directions back to the original point
522*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  #2
523*c0909341SAndroid Build Coastguard Worker.if \pri
524*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #2
525*c0909341SAndroid Build Coastguard Worker.endif
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard Worker        bgt             1b
528*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
529*c0909341SAndroid Build Coastguard Worker        pop             {r4-r9,pc}
530*c0909341SAndroid Build Coastguard Workerendfunc
531*c0909341SAndroid Build Coastguard Worker.endm
532*c0909341SAndroid Build Coastguard Worker
533*c0909341SAndroid Build Coastguard Worker.macro filter_8 w
534*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=0, min=0, suffix=_pri
535*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=0, sec=1, min=0, suffix=_sec
536*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=1, min=1, suffix=_pri_sec
537*c0909341SAndroid Build Coastguard Worker.endm
538*c0909341SAndroid Build Coastguard Worker
539*c0909341SAndroid Build Coastguard Workerfilter_8 8
540*c0909341SAndroid Build Coastguard Workerfilter_8 4
541