xref: /aosp_15_r20/external/libdav1d/src/arm/32/mc.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Martin Storsjo
5*c0909341SAndroid Build Coastguard Worker * All rights reserved.
6*c0909341SAndroid Build Coastguard Worker *
7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
9*c0909341SAndroid Build Coastguard Worker *
10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
11*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
12*c0909341SAndroid Build Coastguard Worker *
13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
14*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
15*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
16*c0909341SAndroid Build Coastguard Worker *
17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*c0909341SAndroid Build Coastguard Worker */
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
30*c0909341SAndroid Build Coastguard Worker#include "util.S"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker.macro avg dst0, dst1, t0, t1, t2, t3
33*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t0,\t1},   [r2, :128]!
34*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t2,\t3},   [r3, :128]!
35*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t0,   \t0,  \t2
36*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t1,   \t1,  \t3
37*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst0, \t0,  #5
38*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst1, \t1,  #5
39*c0909341SAndroid Build Coastguard Worker.endm
40*c0909341SAndroid Build Coastguard Worker
41*c0909341SAndroid Build Coastguard Worker.macro w_avg dst0, dst1, t0, t1, t2, t3
42*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t0,\t1},   [r2, :128]!
43*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t2,\t3},   [r3, :128]!
44*c0909341SAndroid Build Coastguard Worker        vsub.i16        \t0,   \t2,  \t0
45*c0909341SAndroid Build Coastguard Worker        vsub.i16        \t1,   \t3,  \t1
46*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \t0,   \t0,  q15
47*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \t1,   \t1,  q15
48*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t0,   \t2,  \t0
49*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t1,   \t3,  \t1
50*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst0, \t0,  #4
51*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst1, \t1,  #4
52*c0909341SAndroid Build Coastguard Worker.endm
53*c0909341SAndroid Build Coastguard Worker
54*c0909341SAndroid Build Coastguard Worker.macro mask dst0, dst1, t0, t1, t2, t3
55*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14}, [lr, :128]!
56*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t0,\t1},   [r2, :128]!
57*c0909341SAndroid Build Coastguard Worker        vmul.i8         q14,   q14,  q15
58*c0909341SAndroid Build Coastguard Worker        vld1.16         {\t2,\t3},   [r3, :128]!
59*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13,   d28,  #8
60*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14,   d29,  #8
61*c0909341SAndroid Build Coastguard Worker        vsub.i16        \t0,   \t2,  \t0
62*c0909341SAndroid Build Coastguard Worker        vsub.i16        \t1,   \t3,  \t1
63*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \t0,   \t0,  q13
64*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \t1,   \t1,  q14
65*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t0,   \t2,  \t0
66*c0909341SAndroid Build Coastguard Worker        vadd.i16        \t1,   \t3,  \t1
67*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst0, \t0,  #4
68*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \dst1, \t1,  #4
69*c0909341SAndroid Build Coastguard Worker.endm
70*c0909341SAndroid Build Coastguard Worker
71*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type
72*c0909341SAndroid Build Coastguard Workerfunction \type\()_8bpc_neon, export=1
73*c0909341SAndroid Build Coastguard Worker        push            {r4-r6,lr}
74*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #16]
75*c0909341SAndroid Build Coastguard Worker        clz             r4,  r4
76*c0909341SAndroid Build Coastguard Worker.ifnc \type, avg
77*c0909341SAndroid Build Coastguard Worker        ldr             lr, [sp, #24]
78*c0909341SAndroid Build Coastguard Worker.endif
79*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg
80*c0909341SAndroid Build Coastguard Worker        vdup.s16        q15, lr
81*c0909341SAndroid Build Coastguard Worker        vneg.s16        q15, q15
82*c0909341SAndroid Build Coastguard Worker        vshl.i16        q15, q15, #11
83*c0909341SAndroid Build Coastguard Worker.endif
84*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
85*c0909341SAndroid Build Coastguard Worker        vmov.i8         q15, #256-2
86*c0909341SAndroid Build Coastguard Worker.endif
87*c0909341SAndroid Build Coastguard Worker        adr             r12, L(\type\()_tbl)
88*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  #24
89*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [r12, r4, lsl #2]
90*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0,  q1,  q2,  q3
91*c0909341SAndroid Build Coastguard Worker        add             r12, r12, r4
92*c0909341SAndroid Build Coastguard Worker        bx              r12
93*c0909341SAndroid Build Coastguard Worker
94*c0909341SAndroid Build Coastguard Worker        .align 2
95*c0909341SAndroid Build Coastguard WorkerL(\type\()_tbl):
96*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_tbl) + CONFIG_THUMB
97*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_tbl) + CONFIG_THUMB
98*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_tbl) + CONFIG_THUMB
99*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_tbl) + CONFIG_THUMB
100*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_tbl) + CONFIG_THUMB
101*c0909341SAndroid Build Coastguard Worker        .word 4f    - L(\type\()_tbl) + CONFIG_THUMB
102*c0909341SAndroid Build Coastguard Worker
103*c0909341SAndroid Build Coastguard Worker4:
104*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
105*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
106*c0909341SAndroid Build Coastguard Worker        cmp             r5,  #4
107*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[0]},  [r0, :32], r1
108*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[1]},  [r6, :32], r1
109*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[0]},  [r0, :32], r1
110*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[1]},  [r6, :32], r1
111*c0909341SAndroid Build Coastguard Worker        beq             0f
112*c0909341SAndroid Build Coastguard Worker        \type           d18, d19,  q0,  q1,  q2,  q3
113*c0909341SAndroid Build Coastguard Worker        cmp             r5,  #8
114*c0909341SAndroid Build Coastguard Worker        vst1.32         {d18[0]},  [r0, :32], r1
115*c0909341SAndroid Build Coastguard Worker        vst1.32         {d18[1]},  [r6, :32], r1
116*c0909341SAndroid Build Coastguard Worker        vst1.32         {d19[0]},  [r0, :32], r1
117*c0909341SAndroid Build Coastguard Worker        vst1.32         {d19[1]},  [r6, :32], r1
118*c0909341SAndroid Build Coastguard Worker        beq             0f
119*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0,  q1,  q2,  q3
120*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[0]},  [r0, :32], r1
121*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[1]},  [r6, :32], r1
122*c0909341SAndroid Build Coastguard Worker        \type           d18, d19,  q0,  q1,  q2,  q3
123*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[0]},  [r0, :32], r1
124*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[1]},  [r6, :32], r1
125*c0909341SAndroid Build Coastguard Worker        vst1.32         {d18[0]},  [r0, :32], r1
126*c0909341SAndroid Build Coastguard Worker        vst1.32         {d18[1]},  [r6, :32], r1
127*c0909341SAndroid Build Coastguard Worker        vst1.32         {d19[0]},  [r0, :32], r1
128*c0909341SAndroid Build Coastguard Worker        vst1.32         {d19[1]},  [r6, :32], r1
129*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6,pc}
130*c0909341SAndroid Build Coastguard Worker80:
131*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
132*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
133*c0909341SAndroid Build Coastguard Worker8:
134*c0909341SAndroid Build Coastguard Worker        vst1.8          {d16},  [r0, :64], r1
135*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0,  q1,  q2,  q3
136*c0909341SAndroid Build Coastguard Worker        vst1.8          {d17},  [r6, :64], r1
137*c0909341SAndroid Build Coastguard Worker        vst1.8          {d18},  [r0, :64], r1
138*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
139*c0909341SAndroid Build Coastguard Worker        vst1.8          {d19},  [r6, :64], r1
140*c0909341SAndroid Build Coastguard Worker        ble             0f
141*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0,  q1,  q2,  q3
142*c0909341SAndroid Build Coastguard Worker        b               8b
143*c0909341SAndroid Build Coastguard Worker160:
144*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
145*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
146*c0909341SAndroid Build Coastguard Worker16:
147*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
148*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8},  [r0, :128], r1
149*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
150*c0909341SAndroid Build Coastguard Worker        vst1.8          {q9},  [r6, :128], r1
151*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
152*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10}, [r0, :128], r1
153*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
154*c0909341SAndroid Build Coastguard Worker        vst1.8          {q11}, [r6, :128], r1
155*c0909341SAndroid Build Coastguard Worker        ble             0f
156*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
157*c0909341SAndroid Build Coastguard Worker        b               16b
158*c0909341SAndroid Build Coastguard Worker320:
159*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
160*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
161*c0909341SAndroid Build Coastguard Worker32:
162*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
163*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
164*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128], r1
165*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
166*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
167*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r6, :128], r1
168*c0909341SAndroid Build Coastguard Worker        ble             0f
169*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
170*c0909341SAndroid Build Coastguard Worker        b               32b
171*c0909341SAndroid Build Coastguard Worker640:
172*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #32
173*c0909341SAndroid Build Coastguard Worker64:
174*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
175*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
176*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
177*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128], r1
178*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
179*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r6, :128], r1
180*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
181*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
182*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128], r1
183*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
184*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
185*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r6, :128], r1
186*c0909341SAndroid Build Coastguard Worker        ble             0f
187*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
188*c0909341SAndroid Build Coastguard Worker        b               64b
189*c0909341SAndroid Build Coastguard Worker1280:
190*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
191*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #64
192*c0909341SAndroid Build Coastguard Worker128:
193*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
194*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
195*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
196*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128]!
197*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
198*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r0, :128], r1
199*c0909341SAndroid Build Coastguard Worker        \type           d18, d19, q0, q1, q2, q3
200*c0909341SAndroid Build Coastguard Worker        \type           d20, d21, q0, q1, q2, q3
201*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r6, :128]!
202*c0909341SAndroid Build Coastguard Worker        \type           d22, d23, q0, q1, q2, q3
203*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
204*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r6, :128], r1
205*c0909341SAndroid Build Coastguard Worker        ble             0f
206*c0909341SAndroid Build Coastguard Worker        \type           d16, d17, q0, q1, q2, q3
207*c0909341SAndroid Build Coastguard Worker        b               128b
208*c0909341SAndroid Build Coastguard Worker
209*c0909341SAndroid Build Coastguard Worker0:
210*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6,pc}
211*c0909341SAndroid Build Coastguard Workerendfunc
212*c0909341SAndroid Build Coastguard Worker.endm
213*c0909341SAndroid Build Coastguard Worker
214*c0909341SAndroid Build Coastguard Workerbidir_fn avg
215*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg
216*c0909341SAndroid Build Coastguard Workerbidir_fn mask
217*c0909341SAndroid Build Coastguard Worker
218*c0909341SAndroid Build Coastguard Worker
219*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type
220*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_8bpc_neon, export=1
221*c0909341SAndroid Build Coastguard Worker        push            {r4-r9,lr}
222*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #28]
223*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #36]
224*c0909341SAndroid Build Coastguard Worker        clz             r8,  r4
225*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(w_mask_\type\()_tbl)
226*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #24
227*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9,  r8,  lsl #2]
228*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
229*c0909341SAndroid Build Coastguard Worker        movw            r12, #6903
230*c0909341SAndroid Build Coastguard Worker        vdup.16         q14, r12
231*c0909341SAndroid Build Coastguard Worker.if \type == 444
232*c0909341SAndroid Build Coastguard Worker        vmov.i8         q15, #64
233*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
234*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  r7         // d0[] <- sign
235*c0909341SAndroid Build Coastguard Worker        vmov.i8         d30, #129
236*c0909341SAndroid Build Coastguard Worker        vsub.i8         d30, d30, d0    // 129 - sign
237*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
238*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  r7         // d0[] <- sign
239*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #256
240*c0909341SAndroid Build Coastguard Worker        vsub.i16        q15, q15, q0    // 256 - sign
241*c0909341SAndroid Build Coastguard Worker.endif
242*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
243*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
244*c0909341SAndroid Build Coastguard Worker        bx              r9
245*c0909341SAndroid Build Coastguard Worker
246*c0909341SAndroid Build Coastguard Worker        .align 2
247*c0909341SAndroid Build Coastguard WorkerL(w_mask_\type\()_tbl):
248*c0909341SAndroid Build Coastguard Worker        .word 1280f  - L(w_mask_\type\()_tbl) + CONFIG_THUMB
249*c0909341SAndroid Build Coastguard Worker        .word 640f   - L(w_mask_\type\()_tbl) + CONFIG_THUMB
250*c0909341SAndroid Build Coastguard Worker        .word 320f   - L(w_mask_\type\()_tbl) + CONFIG_THUMB
251*c0909341SAndroid Build Coastguard Worker        .word 160f   - L(w_mask_\type\()_tbl) + CONFIG_THUMB
252*c0909341SAndroid Build Coastguard Worker        .word 8f     - L(w_mask_\type\()_tbl) + CONFIG_THUMB
253*c0909341SAndroid Build Coastguard Worker        .word 4f     - L(w_mask_\type\()_tbl) + CONFIG_THUMB
254*c0909341SAndroid Build Coastguard Worker
255*c0909341SAndroid Build Coastguard Worker4:
256*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2,  :128]! // tmp1 (four rows at once)
257*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r3,  :128]! // tmp2 (four rows at once)
258*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
259*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q2,  q0    // tmp2-tmp1
260*c0909341SAndroid Build Coastguard Worker        vsub.i16        q9,  q3,  q1
261*c0909341SAndroid Build Coastguard Worker        vabd.s16        q10, q0,  q2    // (abs(tmp1[x] - tmp2[x]))
262*c0909341SAndroid Build Coastguard Worker        vabd.s16        q11, q1,  q3
263*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q10, q14, q10   // 6903 - abs ()
264*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q11, q14, q11
265*c0909341SAndroid Build Coastguard Worker        vshr.s16        q10, q10, #8    // 64-m = (6903 - abs()) >> 8
266*c0909341SAndroid Build Coastguard Worker        vshr.s16        q11, q11, #8
267*c0909341SAndroid Build Coastguard Worker        vshl.s16        q12, q10, #9    // (64-m)<<9
268*c0909341SAndroid Build Coastguard Worker        vshl.s16        q13, q11, #9
269*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q12, q12, q8    // ((tmp2-tmp1)*(64-m)<<9)>>15
270*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q13, q13, q9
271*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q0    // (((tmp2-tmp1)*(64-m)<<9)>>15) + tmp1
272*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q1
273*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d24, q12, #4    // (((((tmp2-tmp1)*(64-m)<<9)>>15) + tmp1) + 8) >> 4
274*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d25, q13, #4
275*c0909341SAndroid Build Coastguard Worker.if \type == 444
276*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d20, q10        // 64 - m
277*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d21, q11
278*c0909341SAndroid Build Coastguard Worker        vsub.i8         q10, q15, q10   // m
279*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20, d21}, [r6,  :128]!
280*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
281*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (64 - m) + (64 - n) (column wise addition)
282*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d21, d22, d23
283*c0909341SAndroid Build Coastguard Worker        vmovn.s16       d6,  q10
284*c0909341SAndroid Build Coastguard Worker        vhsub.u8        d6,  d30, d6    // ((129 - sign) - ((64 - m) + (64 - n))) >> 1
285*c0909341SAndroid Build Coastguard Worker        vst1.8          {d6},  [r6,  :64]!
286*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
287*c0909341SAndroid Build Coastguard Worker        vadd.s16        d20, d20, d21   // (64 - my1) + (64 - my2) (row wise addition)
288*c0909341SAndroid Build Coastguard Worker        vadd.s16        d21, d22, d23
289*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (128 - m) + (128 - n) (column wise addition)
290*c0909341SAndroid Build Coastguard Worker        vsub.s16        d20, d30, d20   // (256 - sign) - ((128 - m) + (128 - n))
291*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d20, q10,  #2   // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2
292*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r6,  :32]!
293*c0909341SAndroid Build Coastguard Worker.endif
294*c0909341SAndroid Build Coastguard Worker        vst1.32         {d24[0]}, [r0,  :32], r1
295*c0909341SAndroid Build Coastguard Worker        vst1.32         {d24[1]}, [r12, :32], r1
296*c0909341SAndroid Build Coastguard Worker        vst1.32         {d25[0]}, [r0,  :32], r1
297*c0909341SAndroid Build Coastguard Worker        vst1.32         {d25[1]}, [r12, :32], r1
298*c0909341SAndroid Build Coastguard Worker        bgt             4b
299*c0909341SAndroid Build Coastguard Worker        pop             {r4-r9,pc}
300*c0909341SAndroid Build Coastguard Worker8:
301*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2,  :128]! // tmp1y1, tmp1y2
302*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r3,  :128]! // tmp2y1, tmp2y2
303*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
304*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q2,  q0    // tmp2y1 - tmp1y1
305*c0909341SAndroid Build Coastguard Worker        vsub.i16        q9,  q3,  q1    // tmp2y2 - tmp1y2
306*c0909341SAndroid Build Coastguard Worker        vabd.s16        q10, q0,  q2    // abs(tmp1y1 - tmp2y1)
307*c0909341SAndroid Build Coastguard Worker        vabd.s16        q11, q1,  q3    // abs(tmp1y2 - tmp2y2)
308*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q10, q14, q10   // 6903 - abs(tmp1y1 - tmp2y1)
309*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q11, q14, q11   // 6903 - abs(tmp1y2 - tmp2y2)
310*c0909341SAndroid Build Coastguard Worker        vshr.s16        q10, q10, #8    // 64 - my1 = 6903 - abs(tmp1y1 - tmp2y1) >> 8
311*c0909341SAndroid Build Coastguard Worker        vshr.s16        q11, q11, #8    // 64 - my2 = 6903 - abs(tmp1y2 - tmp2y2) >> 8
312*c0909341SAndroid Build Coastguard Worker        vshl.s16        q12, q10, #9    // (64 - my1) << 9
313*c0909341SAndroid Build Coastguard Worker        vshl.s16        q13, q11, #9    // (64 - my2) << 9
314*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q12, q12, q8    // ((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15
315*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q13, q13, q9    // ((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15
316*c0909341SAndroid Build Coastguard Worker        vadd.s16        q12, q12, q0    // (((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1
317*c0909341SAndroid Build Coastguard Worker        vadd.s16        q13, q13, q1    // (((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2
318*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d24, q12, #4    // (((((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1) + 8) >> 4
319*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d25, q13, #4    // (((((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2) + 8) >> 4
320*c0909341SAndroid Build Coastguard Worker.if \type == 444
321*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d20, q10        // 64 - m
322*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d21, q11
323*c0909341SAndroid Build Coastguard Worker        vsub.i8         q10, q15, q10   // m
324*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20, d21}, [r6,  :128]!
325*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
326*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (64 - my1) + (64 - ny1) (column wise addition)
327*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d21, d22, d23   // (64 - my2) + (64 - ny2)
328*c0909341SAndroid Build Coastguard Worker        vmovn.s16       d20, q10
329*c0909341SAndroid Build Coastguard Worker        vhsub.u8        d20, d30, d20   // ((129 - sign) - ((64 - my1/y2) + (64 - ny1/y2))) >> 1
330*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20}, [r6,  :64]!
331*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
332*c0909341SAndroid Build Coastguard Worker        vadd.s16        q10, q10, q11   // (64 - my1) + (64 - my2) (row wise addition)
333*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (128 - m) + (128 - n) (column wise addition)
334*c0909341SAndroid Build Coastguard Worker        vsub.s16        d20, d30, d20   // (256 - sign) - ((128 - m) + (128 - n))
335*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d20, q10, #2    // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2
336*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r6,  :32]!
337*c0909341SAndroid Build Coastguard Worker.endif
338*c0909341SAndroid Build Coastguard Worker        vst1.16         {d24}, [r0,  :64], r1
339*c0909341SAndroid Build Coastguard Worker        vst1.16         {d25}, [r12, :64], r1
340*c0909341SAndroid Build Coastguard Worker        bgt             8b
341*c0909341SAndroid Build Coastguard Worker        pop             {r4-r9,pc}
342*c0909341SAndroid Build Coastguard Worker1280:
343*c0909341SAndroid Build Coastguard Worker640:
344*c0909341SAndroid Build Coastguard Worker320:
345*c0909341SAndroid Build Coastguard Worker160:
346*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r4
347*c0909341SAndroid Build Coastguard Worker.if \type == 444
348*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r4
349*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
350*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r4,  lsr #1
351*c0909341SAndroid Build Coastguard Worker.endif
352*c0909341SAndroid Build Coastguard Worker        add             r9,  r3,  r4,  lsl #1
353*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  r4,  lsl #1
354*c0909341SAndroid Build Coastguard Worker161:
355*c0909341SAndroid Build Coastguard Worker        mov             r8,  r4
356*c0909341SAndroid Build Coastguard Worker16:
357*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2,  :128]! // tmp1y1
358*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r3,  :128]! // tmp2y1
359*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r7,  :128]! // tmp1y2
360*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #16
361*c0909341SAndroid Build Coastguard Worker        vsub.i16        q2,  q2,  q0    // tmp2y1 - tmp1y1
362*c0909341SAndroid Build Coastguard Worker        vsub.i16        q3,  q3,  q1
363*c0909341SAndroid Build Coastguard Worker        vabs.s16        q10, q2         // abs(tm2y1 - tmp1y1)
364*c0909341SAndroid Build Coastguard Worker        vabs.s16        q11, q3
365*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q10, q14, q10   // 6903 - abs(tmp1y1 - tmp2y1)
366*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q11, q14, q11
367*c0909341SAndroid Build Coastguard Worker        vshr.s16        q10, q10, #8    // 64 - my1 = 6903 - abs(tmp1y1 - tmp2y1) >> 8
368*c0909341SAndroid Build Coastguard Worker        vshr.s16        q11, q11, #8
369*c0909341SAndroid Build Coastguard Worker        vshl.s16        q12, q10, #9    // (64 - my1) << 9
370*c0909341SAndroid Build Coastguard Worker        vshl.s16        q13, q11, #9
371*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q12, q12, q2    // ((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15
372*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q13, q13, q3
373*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q0    // (((tmp2y1 - tmp1y1) * (64 - my1) << 9) >> 15) + tmp1y1
374*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q1
375*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r9,  :128]! // tmp2h2
376*c0909341SAndroid Build Coastguard Worker.if \type == 444
377*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d20, q10        // 64 - my1
378*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d21, q11
379*c0909341SAndroid Build Coastguard Worker        vsub.i8         q10, q15, q10   // my1
380*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20, d21}, [r6,  :128]!
381*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
382*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (64 - my1) + (64 - ny1) (column wise addition)
383*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d21, d22, d23
384*c0909341SAndroid Build Coastguard Worker        vmovn.s16       d20, q10
385*c0909341SAndroid Build Coastguard Worker        vhsub.u8        d20, d30, d20   // ((129 - sign) - ((64 - my1) + (64 - ny1))) >> 1
386*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20}, [r6,  :64]!
387*c0909341SAndroid Build Coastguard Worker.endif
388*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d24, q12, #4    // (((((tmp2y1 - tmp1y1)*(64 - my1) << 9) >> 15) + tmp1y1) + 8) >> 4
389*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d25, q13, #4
390*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q8    // tmp2y2 - tmp1y2
391*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q9
392*c0909341SAndroid Build Coastguard Worker        vst1.16         {d24, d25}, [r0,  :128]!    // store dsty1
393*c0909341SAndroid Build Coastguard Worker        vabs.s16        q2,  q0         // abs(tmp2y2 - tmp1y2)
394*c0909341SAndroid Build Coastguard Worker        vabs.s16        q3,  q1
395*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q2,  q14, q2    // 6903 - abs(tmp2y2 - tmp1y2)
396*c0909341SAndroid Build Coastguard Worker        vqsub.u16       q3,  q14, q3
397*c0909341SAndroid Build Coastguard Worker        vshr.s16        q2,  q2,  #8    // (6903 - abs(tmp2y2 - tmp1y2)) >> 8
398*c0909341SAndroid Build Coastguard Worker        vshr.s16        q3,  q3,  #8
399*c0909341SAndroid Build Coastguard Worker        vshl.s16        q12, q2,  #9    // (64 - my2) << 9
400*c0909341SAndroid Build Coastguard Worker        vshl.s16        q13, q3,  #9
401*c0909341SAndroid Build Coastguard Worker.if \type == 444
402*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d4,  q2         // 64 - my2
403*c0909341SAndroid Build Coastguard Worker        vmovn.u16       d5,  q3
404*c0909341SAndroid Build Coastguard Worker        vsub.i8         q2,  q15, q2    // my2
405*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4,  d5},  [lr,  :128]!
406*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
407*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d4,  d4,  d5    // (64 - my2) + (64 - ny2) (column wise addition)
408*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d5,  d6,  d7
409*c0909341SAndroid Build Coastguard Worker        vmovn.s16       d4,  q2
410*c0909341SAndroid Build Coastguard Worker        vhsub.u8        d4,  d30, d4    // ((129 - sign) - ((64 - my2) + (64 - ny2))) >> 1
411*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4},  [lr,  :64]!
412*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
413*c0909341SAndroid Build Coastguard Worker        vadd.s16        q10, q10, q2    // (64 - my1) + (64 - my2) (row wise addition)
414*c0909341SAndroid Build Coastguard Worker        vadd.s16        q11, q11, q3
415*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d20, d20, d21   // (128 - m) + (128 - n) (column wise addition)
416*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d21, d22, d23
417*c0909341SAndroid Build Coastguard Worker        vsub.s16        q10, q15, q10   // (256 - sign) - ((128 - m) + (128 - n))
418*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d20, q10, #2    // ((256 - sign) - ((128 - m) + (128 - n)) + 2) >> 2
419*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20}, [r6,  :64]!
420*c0909341SAndroid Build Coastguard Worker.endif
421*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q12, q12, q0    // ((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15
422*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     q13, q13, q1
423*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q8    // (((tmp2y2 - tmp1y2) * (64 - my2) << 9) >> 15) + tmp1y2
424*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q9
425*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d24, q12, #4    // (((((tmp2y2 - tmp1y2)*(64 - my2) << 9) >> 15) + tmp1y2) + 8) >> 4
426*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d25, q13, #4
427*c0909341SAndroid Build Coastguard Worker        vst1.16         {d24, d25}, [r12, :128]!   // store dsty2
428*c0909341SAndroid Build Coastguard Worker        bgt             16b
429*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
430*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  r4,  lsl #1
431*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  r4,  lsl #1
432*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r4,  lsl #1
433*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r4,  lsl #1
434*c0909341SAndroid Build Coastguard Worker.if \type == 444
435*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r4
436*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r4
437*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
438*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r4,  lsr #1
439*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r4,  lsr #1
440*c0909341SAndroid Build Coastguard Worker.endif
441*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
442*c0909341SAndroid Build Coastguard Worker        add             r12, r12, r1
443*c0909341SAndroid Build Coastguard Worker        bgt             161b
444*c0909341SAndroid Build Coastguard Worker        pop             {r4-r9,pc}
445*c0909341SAndroid Build Coastguard Workerendfunc
446*c0909341SAndroid Build Coastguard Worker.endm
447*c0909341SAndroid Build Coastguard Worker
448*c0909341SAndroid Build Coastguard Workerw_mask_fn 444
449*c0909341SAndroid Build Coastguard Workerw_mask_fn 422
450*c0909341SAndroid Build Coastguard Workerw_mask_fn 420
451*c0909341SAndroid Build Coastguard Worker
452*c0909341SAndroid Build Coastguard Worker
453*c0909341SAndroid Build Coastguard Workerfunction blend_8bpc_neon, export=1
454*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
455*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #12]
456*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
457*c0909341SAndroid Build Coastguard Worker        adr             r3,  L(blend_tbl)
458*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
459*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r3, lr, lsl #2]
460*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  lr
461*c0909341SAndroid Build Coastguard Worker        bx              r3
462*c0909341SAndroid Build Coastguard Worker
463*c0909341SAndroid Build Coastguard Worker        .align 2
464*c0909341SAndroid Build Coastguard WorkerL(blend_tbl):
465*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(blend_tbl) + CONFIG_THUMB
466*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(blend_tbl) + CONFIG_THUMB
467*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(blend_tbl) + CONFIG_THUMB
468*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(blend_tbl) + CONFIG_THUMB
469*c0909341SAndroid Build Coastguard Worker
470*c0909341SAndroid Build Coastguard Worker40:
471*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #64
472*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
473*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
474*c0909341SAndroid Build Coastguard Worker4:
475*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d2},     [r5,  :64]!
476*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d1},     [r2,  :64]!
477*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},   [r0,  :32]
478*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
479*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]},  [r12, :32]
480*c0909341SAndroid Build Coastguard Worker        vsub.i8         d3,  d22, d2
481*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d1,  d2
482*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d0,  d3
483*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q8,  #6
484*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r0,  :32], r1
485*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[1]}, [r12, :32], r1
486*c0909341SAndroid Build Coastguard Worker        bgt             4b
487*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
488*c0909341SAndroid Build Coastguard Worker80:
489*c0909341SAndroid Build Coastguard Worker        vmov.i8         d16, #64
490*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
491*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
492*c0909341SAndroid Build Coastguard Worker8:
493*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q1},  [r5,  :128]!
494*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q2},  [r2,  :128]!
495*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d0},  [r0,  :64]
496*c0909341SAndroid Build Coastguard Worker        vsub.i8         d17, d16, d2
497*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d1},  [r12, :64]
498*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
499*c0909341SAndroid Build Coastguard Worker        vsub.i8         d18, d16, d3
500*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d4
501*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d17
502*c0909341SAndroid Build Coastguard Worker        vmull.u8        q10, d3,  d5
503*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q10, d1,  d18
504*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q3,  #6
505*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q10, #6
506*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d22}, [r0,  :64], r1
507*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d23}, [r12, :64], r1
508*c0909341SAndroid Build Coastguard Worker        bgt             8b
509*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
510*c0909341SAndroid Build Coastguard Worker160:
511*c0909341SAndroid Build Coastguard Worker        vmov.i8         q12, #64
512*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
513*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
514*c0909341SAndroid Build Coastguard Worker16:
515*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q1,  q2},  [r5,  :128]!
516*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q8,  q9},  [r2,  :128]!
517*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q0},  [r0,  :128]
518*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
519*c0909341SAndroid Build Coastguard Worker        vsub.i8         q15, q12, q1
520*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q13}, [r12, :128]
521*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d16, d2
522*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d30
523*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d17, d3
524*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d1,  d31
525*c0909341SAndroid Build Coastguard Worker        vsub.i8         q15, q12, q2
526*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q3,  #6
527*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q14, #6
528*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d18, d4
529*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d26, d30
530*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d19, d5
531*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d27, d31
532*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q3,  #6
533*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q14, #6
534*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q10}, [r0,  :128], r1
535*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q11}, [r12, :128], r1
536*c0909341SAndroid Build Coastguard Worker        bgt             16b
537*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
538*c0909341SAndroid Build Coastguard Worker320:
539*c0909341SAndroid Build Coastguard Worker        vmov.i8         q10, #64
540*c0909341SAndroid Build Coastguard Worker32:
541*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q2,  q3},  [r5,  :128]!
542*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q8,  q9},  [r2,  :128]!
543*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q0,  q1},  [r0,  :128]
544*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
545*c0909341SAndroid Build Coastguard Worker        vsub.i8         q11, q10, q2
546*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d16, d4
547*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d0,  d22
548*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d17, d5
549*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d1,  d23
550*c0909341SAndroid Build Coastguard Worker        vsub.i8         q11, q10, q3
551*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q15, #6
552*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q14, #6
553*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d18, d6
554*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d2,  d22
555*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d19, d7
556*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d3,  d23
557*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d26, q15, #6
558*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d27, q14, #6
559*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q12, q13}, [r0,  :128],  r1
560*c0909341SAndroid Build Coastguard Worker        bgt             32b
561*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
562*c0909341SAndroid Build Coastguard Workerendfunc
563*c0909341SAndroid Build Coastguard Worker
564*c0909341SAndroid Build Coastguard Workerfunction blend_h_8bpc_neon, export=1
565*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
566*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
567*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(obmc_masks)
568*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r4
569*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r4,  lsr #2
570*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
571*c0909341SAndroid Build Coastguard Worker        adr             r12, L(blend_h_tbl)
572*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #24
573*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
574*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
575*c0909341SAndroid Build Coastguard Worker        bx              r12
576*c0909341SAndroid Build Coastguard Worker
577*c0909341SAndroid Build Coastguard Worker        .align 2
578*c0909341SAndroid Build Coastguard WorkerL(blend_h_tbl):
579*c0909341SAndroid Build Coastguard Worker        .word 1280f  - L(blend_h_tbl) + CONFIG_THUMB
580*c0909341SAndroid Build Coastguard Worker        .word 640f   - L(blend_h_tbl) + CONFIG_THUMB
581*c0909341SAndroid Build Coastguard Worker        .word 320f   - L(blend_h_tbl) + CONFIG_THUMB
582*c0909341SAndroid Build Coastguard Worker        .word 160f   - L(blend_h_tbl) + CONFIG_THUMB
583*c0909341SAndroid Build Coastguard Worker        .word 80f    - L(blend_h_tbl) + CONFIG_THUMB
584*c0909341SAndroid Build Coastguard Worker        .word 40f    - L(blend_h_tbl) + CONFIG_THUMB
585*c0909341SAndroid Build Coastguard Worker        .word 20f    - L(blend_h_tbl) + CONFIG_THUMB
586*c0909341SAndroid Build Coastguard Worker
587*c0909341SAndroid Build Coastguard Worker20:
588*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #64
589*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
590*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
591*c0909341SAndroid Build Coastguard Worker2:
592*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[], d3[]},  [r5,  :16]!
593*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]},  [r2,  :32]!
594*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
595*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[]},   [r0,  :16]
596*c0909341SAndroid Build Coastguard Worker        vzip.8          d2,  d3
597*c0909341SAndroid Build Coastguard Worker        vsub.i8         d4,  d22, d2
598*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[1]},  [r12, :16]
599*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d1,  d2
600*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d0,  d4
601*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q8,  #6
602*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[0]}, [r0,  :16], r1
603*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[1]}, [r12, :16], r1
604*c0909341SAndroid Build Coastguard Worker        bgt             2b
605*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
606*c0909341SAndroid Build Coastguard Worker40:
607*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #64
608*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
609*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
610*c0909341SAndroid Build Coastguard Worker4:
611*c0909341SAndroid Build Coastguard Worker        vld2.u8         {d2[],  d3[]},   [r5,  :16]!
612*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d1},     [r2,  :64]!
613*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
614*c0909341SAndroid Build Coastguard Worker        vext.u8         d2,  d2,  d3,   #4
615*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},   [r0,  :32]
616*c0909341SAndroid Build Coastguard Worker        vsub.i8         d6,  d22, d2
617*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]},  [r12, :32]
618*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d1,  d2
619*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d0,  d6
620*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q8,  #6
621*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r0,  :32], r1
622*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[1]}, [r12, :32], r1
623*c0909341SAndroid Build Coastguard Worker        bgt             4b
624*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
625*c0909341SAndroid Build Coastguard Worker80:
626*c0909341SAndroid Build Coastguard Worker        vmov.i8         q8, #64
627*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
628*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
629*c0909341SAndroid Build Coastguard Worker8:
630*c0909341SAndroid Build Coastguard Worker        vld2.u8         {d2[],  d3[]},  [r5,  :16]!
631*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d4,  d5},  [r2,  :128]!
632*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d0},   [r0,  :64]
633*c0909341SAndroid Build Coastguard Worker        vsub.i8         q9,  q8,  q1
634*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d1},   [r12, :64]
635*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
636*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d4
637*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d18
638*c0909341SAndroid Build Coastguard Worker        vmull.u8        q10, d3,  d5
639*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q10, d1,  d19
640*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q3,  #6
641*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q10, #6
642*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d22}, [r0,  :64], r1
643*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d23}, [r12, :64], r1
644*c0909341SAndroid Build Coastguard Worker        bgt             8b
645*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
646*c0909341SAndroid Build Coastguard Worker160:
647*c0909341SAndroid Build Coastguard Worker        vmov.i8         q12, #64
648*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
649*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
650*c0909341SAndroid Build Coastguard Worker16:
651*c0909341SAndroid Build Coastguard Worker        vld2.u8         {d28[], d29[]}, [r5,  :16]!
652*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d2,  d3,  d4,  d5},  [r2,  :128]!
653*c0909341SAndroid Build Coastguard Worker        vsub.i8         q15, q12, q14
654*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q0},  [r0,  :128]
655*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
656*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q13}, [r12, :128]
657*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d28
658*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d30
659*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d3,  d28
660*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d1,  d30
661*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d18, q3,  #6
662*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d19, q8,  #6
663*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d4,  d29
664*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d26, d31
665*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d5,  d29
666*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d27, d31
667*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q3,  #6
668*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q8,  #6
669*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q9},  [r0,  :128], r1
670*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q10}, [r12, :128], r1
671*c0909341SAndroid Build Coastguard Worker        bgt             16b
672*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
673*c0909341SAndroid Build Coastguard Worker320:
674*c0909341SAndroid Build Coastguard Worker640:
675*c0909341SAndroid Build Coastguard Worker1280:
676*c0909341SAndroid Build Coastguard Worker        vmov.i8         d20, #64
677*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
678*c0909341SAndroid Build Coastguard Worker321:
679*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d6[]},  [r5]!
680*c0909341SAndroid Build Coastguard Worker        vsub.i8         d7,  d20, d6
681*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
682*c0909341SAndroid Build Coastguard Worker32:
683*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q8,  q9},  [r2,  :128]!
684*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q0,  q1},  [r0,  :128]
685*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d16, d6
686*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d0,  d7
687*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d17, d6
688*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d1,  d7
689*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d0,  q15, #6
690*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d1,  q14, #6
691*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d18, d6
692*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d2,  d7
693*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d19, d6
694*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d3,  d7
695*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d2,  q15, #6
696*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d3,  q14, #6
697*c0909341SAndroid Build Coastguard Worker        subs            r12, r12, #32
698*c0909341SAndroid Build Coastguard Worker        vst1.u8         {q0,  q1},  [r0,  :128]!
699*c0909341SAndroid Build Coastguard Worker        bgt             32b
700*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
701*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
702*c0909341SAndroid Build Coastguard Worker        bgt             321b
703*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
704*c0909341SAndroid Build Coastguard Workerendfunc
705*c0909341SAndroid Build Coastguard Worker
706*c0909341SAndroid Build Coastguard Workerfunction blend_v_8bpc_neon, export=1
707*c0909341SAndroid Build Coastguard Worker        push            {r4,lr}
708*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #8]
709*c0909341SAndroid Build Coastguard Worker        movrel          lr,  X(obmc_masks)
710*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r3
711*c0909341SAndroid Build Coastguard Worker        clz             r12, r3
712*c0909341SAndroid Build Coastguard Worker        adr             r3,  L(blend_v_tbl)
713*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #26
714*c0909341SAndroid Build Coastguard Worker        ldr             r12, [r3, r12, lsl #2]
715*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  r12
716*c0909341SAndroid Build Coastguard Worker        bx              r3
717*c0909341SAndroid Build Coastguard Worker
718*c0909341SAndroid Build Coastguard Worker        .align 2
719*c0909341SAndroid Build Coastguard WorkerL(blend_v_tbl):
720*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(blend_v_tbl) + CONFIG_THUMB
721*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(blend_v_tbl) + CONFIG_THUMB
722*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(blend_v_tbl) + CONFIG_THUMB
723*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(blend_v_tbl) + CONFIG_THUMB
724*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(blend_v_tbl) + CONFIG_THUMB
725*c0909341SAndroid Build Coastguard Worker
726*c0909341SAndroid Build Coastguard Worker20:
727*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #64
728*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2[]},   [lr]
729*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
730*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
731*c0909341SAndroid Build Coastguard Worker        vsub.i8         d3,  d22, d2
732*c0909341SAndroid Build Coastguard Worker2:
733*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1[0]},  [r2,  :16]!
734*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[]},   [r0]
735*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
736*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1[1]},  [r2]
737*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[1]},  [r12]
738*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d1,  d2
739*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d0,  d3
740*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d6,  q2,  #6
741*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
742*c0909341SAndroid Build Coastguard Worker        vst1.8          {d6[0]},  [r0],  r1
743*c0909341SAndroid Build Coastguard Worker        vst1.8          {d6[1]},  [r12], r1
744*c0909341SAndroid Build Coastguard Worker        bgt             2b
745*c0909341SAndroid Build Coastguard Worker        pop             {r4,pc}
746*c0909341SAndroid Build Coastguard Worker40:
747*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #64
748*c0909341SAndroid Build Coastguard Worker        vld1.32         {d4[]},   [lr,  :32]
749*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
750*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
751*c0909341SAndroid Build Coastguard Worker        vsub.i8         d5,  d22, d4
752*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #2
753*c0909341SAndroid Build Coastguard Worker4:
754*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d2},     [r2,  :64]!
755*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},   [r0,  :32]
756*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]},  [r12, :32]
757*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
758*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d4
759*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d5
760*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q3,  #6
761*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[0]}, [r0,  :16]!
762*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[2]}, [r12, :16]!
763*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20[2]}, [r0],  r1
764*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20[6]}, [r12], r1
765*c0909341SAndroid Build Coastguard Worker        bgt             4b
766*c0909341SAndroid Build Coastguard Worker        pop             {r4,pc}
767*c0909341SAndroid Build Coastguard Worker80:
768*c0909341SAndroid Build Coastguard Worker        vmov.i8         d16, #64
769*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d2},  [lr,  :64]
770*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
771*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
772*c0909341SAndroid Build Coastguard Worker        vsub.i8         d17, d16, d2
773*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #4
774*c0909341SAndroid Build Coastguard Worker8:
775*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d4,  d5},  [r2,  :128]!
776*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d0},  [r0,  :64]
777*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d1},  [r12, :64]
778*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
779*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d4
780*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d17
781*c0909341SAndroid Build Coastguard Worker        vmull.u8        q10, d2,  d5
782*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q10, d1,  d17
783*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q3,  #6
784*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q10, #6
785*c0909341SAndroid Build Coastguard Worker        vst1.32         {d22[0]}, [r0,  :32]!
786*c0909341SAndroid Build Coastguard Worker        vst1.32         {d23[0]}, [r12, :32]!
787*c0909341SAndroid Build Coastguard Worker        vst1.16         {d22[2]}, [r0,  :16], r1
788*c0909341SAndroid Build Coastguard Worker        vst1.16         {d23[2]}, [r12, :16], r1
789*c0909341SAndroid Build Coastguard Worker        bgt             8b
790*c0909341SAndroid Build Coastguard Worker        pop             {r4,pc}
791*c0909341SAndroid Build Coastguard Worker160:
792*c0909341SAndroid Build Coastguard Worker        vmov.i8         q12, #64
793*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q14}, [lr,  :128]
794*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
795*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
796*c0909341SAndroid Build Coastguard Worker        vsub.i8         q11, q12, q14
797*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #8
798*c0909341SAndroid Build Coastguard Worker16:
799*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q1,  q2},  [r2,  :128]!
800*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q0},  [r0,  :128]
801*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
802*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q13}, [r12, :128]
803*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d2,  d28
804*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d0,  d22
805*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d3,  d29
806*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d1,  d23
807*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d18, q3,  #6
808*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d19, q8,  #6
809*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d4,  d28
810*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d26, d22
811*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d5,  d29
812*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d27, d23
813*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q3,  #6
814*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q8,  #6
815*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d18},    [r0,  :64]!
816*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d20},    [r12, :64]!
817*c0909341SAndroid Build Coastguard Worker        vst1.32         {d19[0]}, [r0,  :32], r1
818*c0909341SAndroid Build Coastguard Worker        vst1.32         {d21[0]}, [r12, :32], r1
819*c0909341SAndroid Build Coastguard Worker        bgt             16b
820*c0909341SAndroid Build Coastguard Worker        pop             {r4,pc}
821*c0909341SAndroid Build Coastguard Worker320:
822*c0909341SAndroid Build Coastguard Worker        vmov.i8         q10, #64
823*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q2,  q3},  [lr,  :128]
824*c0909341SAndroid Build Coastguard Worker        vsub.i8         q11, q10, q2
825*c0909341SAndroid Build Coastguard Worker        vsub.i8         d24, d20, d6
826*c0909341SAndroid Build Coastguard Worker32:
827*c0909341SAndroid Build Coastguard Worker        vld1.u8         {q8,  q9},  [r2,  :128]!
828*c0909341SAndroid Build Coastguard Worker        vld1.u8         {d0,  d1,  d2},  [r0,  :64]
829*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
830*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d16, d4
831*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d0,  d22
832*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d17, d5
833*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d1,  d23
834*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d0,  q15, #6
835*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d1,  q14, #6
836*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d18, d6
837*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d2,  d24
838*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d2,  q15, #6
839*c0909341SAndroid Build Coastguard Worker        vst1.u8         {d0,  d1,  d2},  [r0,  :64],  r1
840*c0909341SAndroid Build Coastguard Worker        bgt             32b
841*c0909341SAndroid Build Coastguard Worker        pop             {r4,pc}
842*c0909341SAndroid Build Coastguard Workerendfunc
843*c0909341SAndroid Build Coastguard Worker
844*c0909341SAndroid Build Coastguard Worker
845*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions,
846*c0909341SAndroid Build Coastguard Worker// assumes that the caller has loaded the h argument into r5,
847*c0909341SAndroid Build Coastguard Worker// and assumes that r8 is set to (clz(w)-24).
848*c0909341SAndroid Build Coastguard Workerfunction put_neon
849*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(put_tbl)
850*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
851*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
852*c0909341SAndroid Build Coastguard Worker        bx              r9
853*c0909341SAndroid Build Coastguard Worker
854*c0909341SAndroid Build Coastguard Worker        .align 2
855*c0909341SAndroid Build Coastguard WorkerL(put_tbl):
856*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(put_tbl) + CONFIG_THUMB
857*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(put_tbl) + CONFIG_THUMB
858*c0909341SAndroid Build Coastguard Worker        .word 32f   - L(put_tbl) + CONFIG_THUMB
859*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(put_tbl) + CONFIG_THUMB
860*c0909341SAndroid Build Coastguard Worker        .word 8f    - L(put_tbl) + CONFIG_THUMB
861*c0909341SAndroid Build Coastguard Worker        .word 4f    - L(put_tbl) + CONFIG_THUMB
862*c0909341SAndroid Build Coastguard Worker        .word 2f    - L(put_tbl) + CONFIG_THUMB
863*c0909341SAndroid Build Coastguard Worker
864*c0909341SAndroid Build Coastguard Worker2:
865*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[]}, [r2], r3
866*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1[]}, [r2], r3
867*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
868*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0[0]}, [r0, :16], r1
869*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1[0]}, [r0, :16], r1
870*c0909341SAndroid Build Coastguard Worker        bgt             2b
871*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
872*c0909341SAndroid Build Coastguard Worker4:
873*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r2], r3
874*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]}, [r2], r3
875*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
876*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]}, [r0, :32], r1
877*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]}, [r0, :32], r1
878*c0909341SAndroid Build Coastguard Worker        bgt             4b
879*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
880*c0909341SAndroid Build Coastguard Worker8:
881*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2], r3
882*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r2], r3
883*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
884*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0}, [r0, :64], r1
885*c0909341SAndroid Build Coastguard Worker        vst1.8          {d1}, [r0, :64], r1
886*c0909341SAndroid Build Coastguard Worker        bgt             8b
887*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
888*c0909341SAndroid Build Coastguard Worker160:
889*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  r1
890*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
891*c0909341SAndroid Build Coastguard Worker        add             r9,  r2,  r3
892*c0909341SAndroid Build Coastguard Worker        lsl             r3,  r3,  #1
893*c0909341SAndroid Build Coastguard Worker16:
894*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r2], r3
895*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r9], r3
896*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
897*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0}, [r0, :128], r1
898*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1}, [r8, :128], r1
899*c0909341SAndroid Build Coastguard Worker        bgt             16b
900*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
901*c0909341SAndroid Build Coastguard Worker32:
902*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r2], r3
903*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
904*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0,  q1},  [r0, :128], r1
905*c0909341SAndroid Build Coastguard Worker        bgt             32b
906*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
907*c0909341SAndroid Build Coastguard Worker640:
908*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
909*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #32
910*c0909341SAndroid Build Coastguard Worker64:
911*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r2]!
912*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0,  q1},  [r0, :128]!
913*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2,  q3},  [r2], r3
914*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
915*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2,  q3},  [r0, :128], r1
916*c0909341SAndroid Build Coastguard Worker        bgt             64b
917*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
918*c0909341SAndroid Build Coastguard Worker1280:
919*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
920*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #96
921*c0909341SAndroid Build Coastguard Worker128:
922*c0909341SAndroid Build Coastguard Worker        vld1.8          {q8,  q9},  [r2]!
923*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128]!
924*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r2]!
925*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r0, :128]!
926*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12, q13}, [r2]!
927*c0909341SAndroid Build Coastguard Worker        vst1.8          {q12, q13}, [r0, :128]!
928*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14, q15}, [r2], r3
929*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
930*c0909341SAndroid Build Coastguard Worker        vst1.8          {q14, q15}, [r0, :128], r1
931*c0909341SAndroid Build Coastguard Worker        bgt             128b
932*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
933*c0909341SAndroid Build Coastguard Workerendfunc
934*c0909341SAndroid Build Coastguard Worker
935*c0909341SAndroid Build Coastguard Worker
936*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions,
937*c0909341SAndroid Build Coastguard Worker// assumes that the caller has loaded the h argument into r4,
938*c0909341SAndroid Build Coastguard Worker// and assumes that r8 is set to (clz(w)-24), and r7 to w*2.
939*c0909341SAndroid Build Coastguard Workerfunction prep_neon
940*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(prep_tbl)
941*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
942*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
943*c0909341SAndroid Build Coastguard Worker        bx              r9
944*c0909341SAndroid Build Coastguard Worker
945*c0909341SAndroid Build Coastguard Worker        .align 2
946*c0909341SAndroid Build Coastguard WorkerL(prep_tbl):
947*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(prep_tbl) + CONFIG_THUMB
948*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(prep_tbl) + CONFIG_THUMB
949*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(prep_tbl) + CONFIG_THUMB
950*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(prep_tbl) + CONFIG_THUMB
951*c0909341SAndroid Build Coastguard Worker        .word 8f    - L(prep_tbl) + CONFIG_THUMB
952*c0909341SAndroid Build Coastguard Worker        .word 4f    - L(prep_tbl) + CONFIG_THUMB
953*c0909341SAndroid Build Coastguard Worker
954*c0909341SAndroid Build Coastguard Worker4:
955*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r1], r2
956*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]}, [r1], r2
957*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
958*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #4
959*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d2,  #4
960*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1, d2}, [r0, :64]!
961*c0909341SAndroid Build Coastguard Worker        bgt             4b
962*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
963*c0909341SAndroid Build Coastguard Worker8:
964*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1], r2
965*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r1], r2
966*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
967*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #4
968*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d2,  #4
969*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
970*c0909341SAndroid Build Coastguard Worker        bgt             8b
971*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
972*c0909341SAndroid Build Coastguard Worker160:
973*c0909341SAndroid Build Coastguard Worker        add             r9,  r1,  r2
974*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
975*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  r7
976*c0909341SAndroid Build Coastguard Worker        lsl             r7,  r7,  #1
977*c0909341SAndroid Build Coastguard Worker16:
978*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r1], r2
979*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r9], r2
980*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
981*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d4,  #4
982*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d5,  #4
983*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d6,  #4
984*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d7,  #4
985*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128], r7
986*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r8, :128], r7
987*c0909341SAndroid Build Coastguard Worker        bgt             16b
988*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
989*c0909341SAndroid Build Coastguard Worker320:
990*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  r3
991*c0909341SAndroid Build Coastguard Worker32:
992*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r1], r2
993*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
994*c0909341SAndroid Build Coastguard Worker        vshll.u8        q8,  d0,  #4
995*c0909341SAndroid Build Coastguard Worker        vshll.u8        q9,  d1,  #4
996*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2,  q3},  [r1], r2
997*c0909341SAndroid Build Coastguard Worker        vshll.u8        q10, d2,  #4
998*c0909341SAndroid Build Coastguard Worker        vshll.u8        q11, d3,  #4
999*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d4,  #4
1000*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128], r7
1001*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d5,  #4
1002*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r8, :128], r7
1003*c0909341SAndroid Build Coastguard Worker        vshll.u8        q14, d6,  #4
1004*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128], r7
1005*c0909341SAndroid Build Coastguard Worker        vshll.u8        q15, d7,  #4
1006*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r8, :128], r7
1007*c0909341SAndroid Build Coastguard Worker        bgt             32b
1008*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1009*c0909341SAndroid Build Coastguard Worker640:
1010*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
1011*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  #32
1012*c0909341SAndroid Build Coastguard Worker        mov             r6,  #64
1013*c0909341SAndroid Build Coastguard Worker64:
1014*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r1]!
1015*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
1016*c0909341SAndroid Build Coastguard Worker        vshll.u8        q8,  d0,  #4
1017*c0909341SAndroid Build Coastguard Worker        vshll.u8        q9,  d1,  #4
1018*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2,  q3},  [r1], r2
1019*c0909341SAndroid Build Coastguard Worker        vshll.u8        q10, d2,  #4
1020*c0909341SAndroid Build Coastguard Worker        vshll.u8        q11, d3,  #4
1021*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d4,  #4
1022*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128], r6
1023*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d5,  #4
1024*c0909341SAndroid Build Coastguard Worker        vshll.u8        q14, d6,  #4
1025*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r8, :128], r6
1026*c0909341SAndroid Build Coastguard Worker        vshll.u8        q15, d7,  #4
1027*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128], r6
1028*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r8, :128], r6
1029*c0909341SAndroid Build Coastguard Worker        bgt             64b
1030*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1031*c0909341SAndroid Build Coastguard Worker1280:
1032*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #96
1033*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  #32
1034*c0909341SAndroid Build Coastguard Worker        mov             r6,  #64
1035*c0909341SAndroid Build Coastguard Worker128:
1036*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r1]!
1037*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2,  q3},  [r1]!
1038*c0909341SAndroid Build Coastguard Worker        vshll.u8        q10, d0,  #4
1039*c0909341SAndroid Build Coastguard Worker        vshll.u8        q11, d1,  #4
1040*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d2,  #4
1041*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d3,  #4
1042*c0909341SAndroid Build Coastguard Worker        vshll.u8        q14, d4,  #4
1043*c0909341SAndroid Build Coastguard Worker        vshll.u8        q15, d5,  #4
1044*c0909341SAndroid Build Coastguard Worker        vld1.8          {q8,  q9},  [r1]!
1045*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r0, :128], r6
1046*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r8, :128], r6
1047*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d6,  #4
1048*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d7,  #4
1049*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d16, #4
1050*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d17, #4
1051*c0909341SAndroid Build Coastguard Worker        vshll.u8        q8,  d18, #4
1052*c0909341SAndroid Build Coastguard Worker        vshll.u8        q9,  d19, #4
1053*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r1], r2
1054*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r0, :128], r6
1055*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r8, :128], r6
1056*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d20, #4
1057*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d21, #4
1058*c0909341SAndroid Build Coastguard Worker        vshll.u8        q14, d22, #4
1059*c0909341SAndroid Build Coastguard Worker        vshll.u8        q15, d23, #4
1060*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
1061*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [r0, :128], r6
1062*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r8, :128], r6
1063*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128], r6
1064*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r8, :128], r6
1065*c0909341SAndroid Build Coastguard Worker        bgt             128b
1066*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1067*c0909341SAndroid Build Coastguard Workerendfunc
1068*c0909341SAndroid Build Coastguard Worker
1069*c0909341SAndroid Build Coastguard Worker
1070*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6
1071*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d0[]}, [\s0], \strd
1072*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d1[]}, [\s1], \strd
1073*c0909341SAndroid Build Coastguard Worker.ifnb \d2
1074*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d2[]}, [\s0], \strd
1075*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d3[]}, [\s1], \strd
1076*c0909341SAndroid Build Coastguard Worker.endif
1077*c0909341SAndroid Build Coastguard Worker.ifnb \d4
1078*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d4[]}, [\s0], \strd
1079*c0909341SAndroid Build Coastguard Worker.endif
1080*c0909341SAndroid Build Coastguard Worker.ifnb \d5
1081*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d5[]}, [\s1], \strd
1082*c0909341SAndroid Build Coastguard Worker.endif
1083*c0909341SAndroid Build Coastguard Worker.ifnb \d6
1084*c0909341SAndroid Build Coastguard Worker        vld1.\wd        {\d6[]}, [\s0], \strd
1085*c0909341SAndroid Build Coastguard Worker.endif
1086*c0909341SAndroid Build Coastguard Worker.endm
1087*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1088*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d0}, [\s0], \strd
1089*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d1}, [\s1], \strd
1090*c0909341SAndroid Build Coastguard Worker.ifnb \d2
1091*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d2}, [\s0], \strd
1092*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d3}, [\s1], \strd
1093*c0909341SAndroid Build Coastguard Worker.endif
1094*c0909341SAndroid Build Coastguard Worker.ifnb \d4
1095*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d4}, [\s0], \strd
1096*c0909341SAndroid Build Coastguard Worker.endif
1097*c0909341SAndroid Build Coastguard Worker.ifnb \d5
1098*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d5}, [\s1], \strd
1099*c0909341SAndroid Build Coastguard Worker.endif
1100*c0909341SAndroid Build Coastguard Worker.ifnb \d6
1101*c0909341SAndroid Build Coastguard Worker        vld1.8          {\d6}, [\s0], \strd
1102*c0909341SAndroid Build Coastguard Worker.endif
1103*c0909341SAndroid Build Coastguard Worker.endm
1104*c0909341SAndroid Build Coastguard Worker.macro load_16 s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1105*c0909341SAndroid Build Coastguard Worker        load_slice      \s0, \s1, \strd, 16, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1106*c0909341SAndroid Build Coastguard Worker.endm
1107*c0909341SAndroid Build Coastguard Worker.macro load_32 s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1108*c0909341SAndroid Build Coastguard Worker        load_slice      \s0, \s1, \strd, 32, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1109*c0909341SAndroid Build Coastguard Worker.endm
1110*c0909341SAndroid Build Coastguard Worker.macro interleave_1_16 r0, r1, r2, r3, r4
1111*c0909341SAndroid Build Coastguard Worker        vext.8          \r0, \r0, \r1, #6
1112*c0909341SAndroid Build Coastguard Worker        vext.8          \r1, \r1, \r2, #6
1113*c0909341SAndroid Build Coastguard Worker.ifnb \r3
1114*c0909341SAndroid Build Coastguard Worker        vext.8          \r2, \r2, \r3, #6
1115*c0909341SAndroid Build Coastguard Worker        vext.8          \r3, \r3, \r4, #6
1116*c0909341SAndroid Build Coastguard Worker.endif
1117*c0909341SAndroid Build Coastguard Worker.endm
1118*c0909341SAndroid Build Coastguard Worker.macro interleave_1_32 r0, r1, r2, r3, r4
1119*c0909341SAndroid Build Coastguard Worker        vext.8          \r0, \r0, \r1, #4
1120*c0909341SAndroid Build Coastguard Worker        vext.8          \r1, \r1, \r2, #4
1121*c0909341SAndroid Build Coastguard Worker.ifnb \r3
1122*c0909341SAndroid Build Coastguard Worker        vext.8          \r2, \r2, \r3, #4
1123*c0909341SAndroid Build Coastguard Worker        vext.8          \r3, \r3, \r4, #4
1124*c0909341SAndroid Build Coastguard Worker.endif
1125*c0909341SAndroid Build Coastguard Worker.endm
1126*c0909341SAndroid Build Coastguard Worker.macro vmovl_u8 q0, d0, q1, d1, q2, d2, q3, d3, q4, d4, q5, d5, q6, d6
1127*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q0, \d0
1128*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q1, \d1
1129*c0909341SAndroid Build Coastguard Worker.ifnb \q2
1130*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q2, \d2
1131*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q3, \d3
1132*c0909341SAndroid Build Coastguard Worker.endif
1133*c0909341SAndroid Build Coastguard Worker.ifnb \q4
1134*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q4, \d4
1135*c0909341SAndroid Build Coastguard Worker.endif
1136*c0909341SAndroid Build Coastguard Worker.ifnb \q5
1137*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q5, \d5
1138*c0909341SAndroid Build Coastguard Worker.endif
1139*c0909341SAndroid Build Coastguard Worker.ifnb \q6
1140*c0909341SAndroid Build Coastguard Worker        vmovl.u8        \q6, \d6
1141*c0909341SAndroid Build Coastguard Worker.endif
1142*c0909341SAndroid Build Coastguard Worker.endm
1143*c0909341SAndroid Build Coastguard Worker.macro mul_mla_4 d, s0, s1, s2, s3
1144*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d,  \s0,  d0[0]
1145*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d,  \s1,  d0[1]
1146*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d,  \s2,  d0[2]
1147*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d,  \s3,  d0[3]
1148*c0909341SAndroid Build Coastguard Worker.endm
1149*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_0 d0, s0, s1, s2, s3, s4, s5, s6, s7
1150*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d0, \s0,  d0[0]
1151*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s1,  d0[1]
1152*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s2,  d0[2]
1153*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s3,  d0[3]
1154*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s4,  d1[0]
1155*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s5,  d1[1]
1156*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s6,  d1[2]
1157*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s7,  d1[3]
1158*c0909341SAndroid Build Coastguard Worker.endm
1159*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8
1160*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d0, \s0, d0[0]
1161*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s1, d0[1]
1162*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s2, d0[2]
1163*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s3, d0[3]
1164*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s4, d1[0]
1165*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s5, d1[1]
1166*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s6, d1[2]
1167*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s7, d1[3]
1168*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d1, \s1, d0[0]
1169*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s2, d0[1]
1170*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s3, d0[2]
1171*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s4, d0[3]
1172*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s5, d1[0]
1173*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s6, d1[1]
1174*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s7, d1[2]
1175*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s8, d1[3]
1176*c0909341SAndroid Build Coastguard Worker.endm
1177*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9
1178*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d0, \s0, d0[0]
1179*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s1, d0[1]
1180*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s2, d0[2]
1181*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s3, d0[3]
1182*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s4, d1[0]
1183*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s5, d1[1]
1184*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s6, d1[2]
1185*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d0, \s7, d1[3]
1186*c0909341SAndroid Build Coastguard Worker        vmul.s16        \d1, \s2, d0[0]
1187*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s3, d0[1]
1188*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s4, d0[2]
1189*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s5, d0[3]
1190*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s6, d1[0]
1191*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s7, d1[1]
1192*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s8, d1[2]
1193*c0909341SAndroid Build Coastguard Worker        vmla.s16        \d1, \s9, d1[3]
1194*c0909341SAndroid Build Coastguard Worker.endm
1195*c0909341SAndroid Build Coastguard Worker.macro vqrshrun_s16 shift, q0, d0, q1, d1, q2, d2, q3, d3
1196*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d0, \q0, #\shift
1197*c0909341SAndroid Build Coastguard Worker.ifnb \q1
1198*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d1, \q1, #\shift
1199*c0909341SAndroid Build Coastguard Worker.endif
1200*c0909341SAndroid Build Coastguard Worker.ifnb \q2
1201*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d2, \q2, #\shift
1202*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d3, \q3, #\shift
1203*c0909341SAndroid Build Coastguard Worker.endif
1204*c0909341SAndroid Build Coastguard Worker.endm
1205*c0909341SAndroid Build Coastguard Worker.macro vrshr_s16 shift, r0, r1, r2, r3
1206*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \r0, \r0, #\shift
1207*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1208*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \r1, \r1, #\shift
1209*c0909341SAndroid Build Coastguard Worker.endif
1210*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1211*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \r2, \r2, #\shift
1212*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \r3, \r3, #\shift
1213*c0909341SAndroid Build Coastguard Worker.endif
1214*c0909341SAndroid Build Coastguard Worker.endm
1215*c0909341SAndroid Build Coastguard Worker.macro st_16 strd, reg, lanes
1216*c0909341SAndroid Build Coastguard Worker        vst1.16         {\reg[0]}, [r0, :16], \strd
1217*c0909341SAndroid Build Coastguard Worker        vst1.16         {\reg[1]}, [r8, :16], \strd
1218*c0909341SAndroid Build Coastguard Worker.if \lanes > 2
1219*c0909341SAndroid Build Coastguard Worker        vst1.16         {\reg[2]}, [r0, :16], \strd
1220*c0909341SAndroid Build Coastguard Worker        vst1.16         {\reg[3]}, [r8, :16], \strd
1221*c0909341SAndroid Build Coastguard Worker.endif
1222*c0909341SAndroid Build Coastguard Worker.endm
1223*c0909341SAndroid Build Coastguard Worker.macro st_32 strd, r0, r1
1224*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r0[0]}, [r0, :32], \strd
1225*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r0[1]}, [r8, :32], \strd
1226*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1227*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r1[0]}, [r0, :32], \strd
1228*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r1[1]}, [r8, :32], \strd
1229*c0909341SAndroid Build Coastguard Worker.endif
1230*c0909341SAndroid Build Coastguard Worker.endm
1231*c0909341SAndroid Build Coastguard Worker.macro st_reg strd, align, r0, r1, r2, r3, r4, r5, r6, r7
1232*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r0}, [r0, \align], \strd
1233*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r1}, [r8, \align], \strd
1234*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1235*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r2}, [r0, \align], \strd
1236*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r3}, [r8, \align], \strd
1237*c0909341SAndroid Build Coastguard Worker.endif
1238*c0909341SAndroid Build Coastguard Worker.ifnb \r4
1239*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r4}, [r0, \align], \strd
1240*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r5}, [r8, \align], \strd
1241*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r6}, [r0, \align], \strd
1242*c0909341SAndroid Build Coastguard Worker        vst1.8          {\r7}, [r8, \align], \strd
1243*c0909341SAndroid Build Coastguard Worker.endif
1244*c0909341SAndroid Build Coastguard Worker.endm
1245*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, q0, d0, d1, q1, d2, d3
1246*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1247*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,     \q0, \d0, \q1, \d2
1248*c0909341SAndroid Build Coastguard Worker        st_32           \strd, \d0, \d2
1249*c0909341SAndroid Build Coastguard Worker.else
1250*c0909341SAndroid Build Coastguard Worker        vrshr_s16       2,          \q0, \q1
1251*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, :64, \d0, \d1, \d2, \d3
1252*c0909341SAndroid Build Coastguard Worker.endif
1253*c0909341SAndroid Build Coastguard Worker.endm
1254*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, q0, d0, q1, d1, q2, d2, q3, d3
1255*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1256*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,          \q0, \d0, \q1, \d1, \q2, \d2, \q3, \d3
1257*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, :64, \d0, \d1, \d2, \d3
1258*c0909341SAndroid Build Coastguard Worker.else
1259*c0909341SAndroid Build Coastguard Worker        vrshr_s16       2,          \q0, \q1, \q2, \q3
1260*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, :128,\q0, \q1, \q2, \q3
1261*c0909341SAndroid Build Coastguard Worker.endif
1262*c0909341SAndroid Build Coastguard Worker.endm
1263*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, q0, d0, d1, q1, q2, d4, d5, q3
1264*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1265*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d0,   \q0, #6
1266*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d1,   \q1, #6
1267*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d4,   \q2, #6
1268*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    \d5,   \q3, #6
1269*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, :128, \q0, \q2
1270*c0909341SAndroid Build Coastguard Worker.else
1271*c0909341SAndroid Build Coastguard Worker        vrshr_s16       2,     \q0, \q1, \q2, \q3
1272*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q0, \q1}, [r0, :128], \strd
1273*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q2, \q3}, [r8, :128], \strd
1274*c0909341SAndroid Build Coastguard Worker.endif
1275*c0909341SAndroid Build Coastguard Worker.endm
1276*c0909341SAndroid Build Coastguard Worker
1277*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v
1278*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_8bpc_neon, export=1
1279*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1280*c0909341SAndroid Build Coastguard Worker        movw            r8,  \type_h
1281*c0909341SAndroid Build Coastguard Worker        movw            r9,  \type_v
1282*c0909341SAndroid Build Coastguard Worker        b               \op\()_8tap_neon
1283*c0909341SAndroid Build Coastguard Workerendfunc
1284*c0909341SAndroid Build Coastguard Worker.endm
1285*c0909341SAndroid Build Coastguard Worker
1286*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor.
1287*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15)
1288*c0909341SAndroid Build Coastguard Worker#define SMOOTH  ((1*15<<7)|4*15)
1289*c0909341SAndroid Build Coastguard Worker#define SHARP   ((2*15<<7)|3*15)
1290*c0909341SAndroid Build Coastguard Worker
1291*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, my, ds2, sr2, shift_hv
1292*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular,        REGULAR, REGULAR
1293*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_smooth, REGULAR, SMOOTH
1294*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_sharp,  REGULAR, SHARP
1295*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth,         SMOOTH,  SMOOTH
1296*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_regular, SMOOTH,  REGULAR
1297*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_sharp,   SMOOTH,  SHARP
1298*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp,          SHARP,   SHARP
1299*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_regular,  SHARP,   REGULAR
1300*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_smooth,   SHARP,   SMOOTH
1301*c0909341SAndroid Build Coastguard Worker
1302*c0909341SAndroid Build Coastguard Workerfunction \type\()_8tap_neon
1303*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #36]
1304*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #44]
1305*c0909341SAndroid Build Coastguard Worker        movw            r10,  #0x4081  // (1 << 14) | (1 << 7) | (1 << 0)
1306*c0909341SAndroid Build Coastguard Worker        mul             \mx,  \mx, r10
1307*c0909341SAndroid Build Coastguard Worker        mul             \my,  \my, r10
1308*c0909341SAndroid Build Coastguard Worker        add             \mx,  \mx, r8 // mx, 8tap_h, 4tap_h
1309*c0909341SAndroid Build Coastguard Worker        add             \my,  \my, r9 // my, 8tap_v, 4tap_v
1310*c0909341SAndroid Build Coastguard Worker.ifc \type, prep
1311*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \w, #1
1312*c0909341SAndroid Build Coastguard Worker.endif
1313*c0909341SAndroid Build Coastguard Worker
1314*c0909341SAndroid Build Coastguard Worker        clz             r8,  \w
1315*c0909341SAndroid Build Coastguard Worker        tst             \mx, #(0x7f << 14)
1316*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #24
1317*c0909341SAndroid Build Coastguard Worker        movrel          r10, X(mc_subpel_filters), -8
1318*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_8tap_h)
1319*c0909341SAndroid Build Coastguard Worker        tst             \my, #(0x7f << 14)
1320*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_8tap_v)
1321*c0909341SAndroid Build Coastguard Worker        b               \type\()_neon
1322*c0909341SAndroid Build Coastguard Worker
1323*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h):
1324*c0909341SAndroid Build Coastguard Worker        cmp             \w,  #4
1325*c0909341SAndroid Build Coastguard Worker        ubfx            r9,  \mx, #7, #7
1326*c0909341SAndroid Build Coastguard Worker        and             \mx, \mx, #0x7f
1327*c0909341SAndroid Build Coastguard Worker        it              gt
1328*c0909341SAndroid Build Coastguard Worker        movgt           \mx,  r9
1329*c0909341SAndroid Build Coastguard Worker        tst             \my,  #(0x7f << 14)
1330*c0909341SAndroid Build Coastguard Worker        add             \mx, r10, \mx, lsl #3
1331*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_8tap_hv)
1332*c0909341SAndroid Build Coastguard Worker
1333*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_8tap_h_tbl)
1334*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
1335*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
1336*c0909341SAndroid Build Coastguard Worker        bx              r9
1337*c0909341SAndroid Build Coastguard Worker
1338*c0909341SAndroid Build Coastguard Worker        .align 2
1339*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h_tbl):
1340*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1341*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1342*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1343*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1344*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1345*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1346*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_8tap_h_tbl) + CONFIG_THUMB
1347*c0909341SAndroid Build Coastguard Worker
1348*c0909341SAndroid Build Coastguard Worker20:     // 2xN h
1349*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1350*c0909341SAndroid Build Coastguard Worker        add             \mx,  \mx,  #2
1351*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\mx]
1352*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #1
1353*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1354*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1355*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1356*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1357*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1358*c0909341SAndroid Build Coastguard Worker2:
1359*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4},  [\src], \s_strd
1360*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6},  [\sr2], \s_strd
1361*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d4
1362*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q3,  d6
1363*c0909341SAndroid Build Coastguard Worker        vext.8          d5,  d4,  d5,  #2
1364*c0909341SAndroid Build Coastguard Worker        vext.8          d7,  d6,  d7,  #2
1365*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1366*c0909341SAndroid Build Coastguard Worker        vtrn.32         d4,  d6
1367*c0909341SAndroid Build Coastguard Worker        vtrn.32         d5,  d7
1368*c0909341SAndroid Build Coastguard Worker        vmul.s16        d2,  d4,  d0[0]
1369*c0909341SAndroid Build Coastguard Worker        vmla.s16        d2,  d5,  d0[1]
1370*c0909341SAndroid Build Coastguard Worker        vmla.s16        d2,  d6,  d0[2]
1371*c0909341SAndroid Build Coastguard Worker        vmla.s16        d2,  d7,  d0[3]
1372*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d2,  d2,  #2
1373*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d2,  q1,  #4
1374*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2[0]}, [\dst, :16], \d_strd
1375*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2[1]}, [\ds2, :16], \d_strd
1376*c0909341SAndroid Build Coastguard Worker        bgt             2b
1377*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1378*c0909341SAndroid Build Coastguard Worker.endif
1379*c0909341SAndroid Build Coastguard Worker
1380*c0909341SAndroid Build Coastguard Worker40:     // 4xN h
1381*c0909341SAndroid Build Coastguard Worker        add             \mx,  \mx,  #2
1382*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\mx]
1383*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #1
1384*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1385*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1386*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1387*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1388*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1389*c0909341SAndroid Build Coastguard Worker4:
1390*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16}, [\src], \s_strd
1391*c0909341SAndroid Build Coastguard Worker        vld1.8          {d24}, [\sr2], \s_strd
1392*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d16
1393*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d24
1394*c0909341SAndroid Build Coastguard Worker        vext.8          d18, d16, d17, #2
1395*c0909341SAndroid Build Coastguard Worker        vext.8          d20, d16, d17, #4
1396*c0909341SAndroid Build Coastguard Worker        vext.8          d22, d16, d17, #6
1397*c0909341SAndroid Build Coastguard Worker        vext.8          d26, d24, d25, #2
1398*c0909341SAndroid Build Coastguard Worker        vext.8          d28, d24, d25, #4
1399*c0909341SAndroid Build Coastguard Worker        vext.8          d30, d24, d25, #6
1400*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1401*c0909341SAndroid Build Coastguard Worker        vmul.s16        d4,  d16, d0[0]
1402*c0909341SAndroid Build Coastguard Worker        vmla.s16        d4,  d18, d0[1]
1403*c0909341SAndroid Build Coastguard Worker        vmla.s16        d4,  d20, d0[2]
1404*c0909341SAndroid Build Coastguard Worker        vmla.s16        d4,  d22, d0[3]
1405*c0909341SAndroid Build Coastguard Worker        vmul.s16        d5,  d24, d0[0]
1406*c0909341SAndroid Build Coastguard Worker        vmla.s16        d5,  d26, d0[1]
1407*c0909341SAndroid Build Coastguard Worker        vmla.s16        d5,  d28, d0[2]
1408*c0909341SAndroid Build Coastguard Worker        vmla.s16        d5,  d30, d0[3]
1409*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #2
1410*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1411*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d4,  q2,  #4
1412*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [\dst, :32], \d_strd
1413*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[1]}, [\ds2, :32], \d_strd
1414*c0909341SAndroid Build Coastguard Worker.else
1415*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [\dst, :64], \d_strd
1416*c0909341SAndroid Build Coastguard Worker        vst1.16         {d5}, [\ds2, :64], \d_strd
1417*c0909341SAndroid Build Coastguard Worker.endif
1418*c0909341SAndroid Build Coastguard Worker        bgt             4b
1419*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1420*c0909341SAndroid Build Coastguard Worker
1421*c0909341SAndroid Build Coastguard Worker80:     // 8xN h
1422*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [\mx, :64]
1423*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
1424*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1425*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1426*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1427*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1428*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1429*c0909341SAndroid Build Coastguard Worker8:
1430*c0909341SAndroid Build Coastguard Worker        vld1.8          {q8},  [\src], \s_strd
1431*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12}, [\sr2], \s_strd
1432*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d17
1433*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d16
1434*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d25
1435*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d24
1436*c0909341SAndroid Build Coastguard Worker
1437*c0909341SAndroid Build Coastguard Worker        vmul.s16        q10, q8,  d0[0]
1438*c0909341SAndroid Build Coastguard Worker        vmul.s16        q14, q12, d0[0]
1439*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567
1440*c0909341SAndroid Build Coastguard Worker        vext.8          q11, q8,  q9,  #(2*\i)
1441*c0909341SAndroid Build Coastguard Worker        vext.8          q15, q12, q13, #(2*\i)
1442*c0909341SAndroid Build Coastguard Worker.if \i < 4
1443*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q11, d0[\i]
1444*c0909341SAndroid Build Coastguard Worker        vmla.s16        q14, q15, d0[\i]
1445*c0909341SAndroid Build Coastguard Worker.else
1446*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q11, d1[\i-4]
1447*c0909341SAndroid Build Coastguard Worker        vmla.s16        q14, q15, d1[\i-4]
1448*c0909341SAndroid Build Coastguard Worker.endif
1449*c0909341SAndroid Build Coastguard Worker.endr
1450*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1451*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q10, #2
1452*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q14, q14, #2
1453*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1454*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d20, q10, #4
1455*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d28, q14, #4
1456*c0909341SAndroid Build Coastguard Worker        vst1.8          {d20}, [\dst, :64], \d_strd
1457*c0909341SAndroid Build Coastguard Worker        vst1.8          {d28}, [\ds2, :64], \d_strd
1458*c0909341SAndroid Build Coastguard Worker.else
1459*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10}, [\dst, :128], \d_strd
1460*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14}, [\ds2, :128], \d_strd
1461*c0909341SAndroid Build Coastguard Worker.endif
1462*c0909341SAndroid Build Coastguard Worker        bgt             8b
1463*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1464*c0909341SAndroid Build Coastguard Worker
1465*c0909341SAndroid Build Coastguard Worker160:
1466*c0909341SAndroid Build Coastguard Worker320:
1467*c0909341SAndroid Build Coastguard Worker640:
1468*c0909341SAndroid Build Coastguard Worker1280:   // 16xN, 32xN, ... h
1469*c0909341SAndroid Build Coastguard Worker        // This could be done without touching q4-q6, by using only
1470*c0909341SAndroid Build Coastguard Worker        // one temporary for vext in the loop. That's slower on A7 and A53,
1471*c0909341SAndroid Build Coastguard Worker        // (but surprisingly, marginally faster on A8 and A73).
1472*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q6}
1473*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [\mx, :64]
1474*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
1475*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1476*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1477*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1478*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1479*c0909341SAndroid Build Coastguard Worker
1480*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  \w
1481*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  #8
1482*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1483*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1484*c0909341SAndroid Build Coastguard Worker        sub             \d_strd,  \d_strd,  \w
1485*c0909341SAndroid Build Coastguard Worker.endif
1486*c0909341SAndroid Build Coastguard Worker161:
1487*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16, d17, d18},  [\src]!
1488*c0909341SAndroid Build Coastguard Worker        vld1.8          {d24, d25, d26},  [\sr2]!
1489*c0909341SAndroid Build Coastguard Worker        mov             \mx, \w
1490*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d18
1491*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d17
1492*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d16
1493*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d26
1494*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d25
1495*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d24
1496*c0909341SAndroid Build Coastguard Worker
1497*c0909341SAndroid Build Coastguard Worker16:
1498*c0909341SAndroid Build Coastguard Worker        vmul.s16        q1,  q8,  d0[0]
1499*c0909341SAndroid Build Coastguard Worker        vmul.s16        q2,  q9,  d0[0]
1500*c0909341SAndroid Build Coastguard Worker        vmul.s16        q3,  q12, d0[0]
1501*c0909341SAndroid Build Coastguard Worker        vmul.s16        q4,  q13, d0[0]
1502*c0909341SAndroid Build Coastguard Worker.irpc i, 1234567
1503*c0909341SAndroid Build Coastguard Worker        vext.8          q5,  q8,  q9,  #(2*\i)
1504*c0909341SAndroid Build Coastguard Worker        vext.8          q6,  q9,  q10, #(2*\i)
1505*c0909341SAndroid Build Coastguard Worker        vext.8          q11, q12, q13, #(2*\i)
1506*c0909341SAndroid Build Coastguard Worker        vext.8          q15, q13, q14, #(2*\i)
1507*c0909341SAndroid Build Coastguard Worker.if \i < 4
1508*c0909341SAndroid Build Coastguard Worker        vmla.s16        q1,  q5,  d0[\i]
1509*c0909341SAndroid Build Coastguard Worker        vmla.s16        q2,  q6,  d0[\i]
1510*c0909341SAndroid Build Coastguard Worker        vmla.s16        q3,  q11, d0[\i]
1511*c0909341SAndroid Build Coastguard Worker        vmla.s16        q4,  q15, d0[\i]
1512*c0909341SAndroid Build Coastguard Worker.else
1513*c0909341SAndroid Build Coastguard Worker        vmla.s16        q1,  q5,  d1[\i-4]
1514*c0909341SAndroid Build Coastguard Worker        vmla.s16        q2,  q6,  d1[\i-4]
1515*c0909341SAndroid Build Coastguard Worker        vmla.s16        q3,  q11, d1[\i-4]
1516*c0909341SAndroid Build Coastguard Worker        vmla.s16        q4,  q15, d1[\i-4]
1517*c0909341SAndroid Build Coastguard Worker.endif
1518*c0909341SAndroid Build Coastguard Worker.endr
1519*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q1,  q1,  #2
1520*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #2
1521*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #2
1522*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q4,  q4,  #2
1523*c0909341SAndroid Build Coastguard Worker        subs            \mx, \mx, #16
1524*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1525*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d2,  q1,  #4
1526*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d3,  q2,  #4
1527*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d4,  q3,  #4
1528*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d5,  q4,  #4
1529*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1}, [\dst, :128]!
1530*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2}, [\ds2, :128]!
1531*c0909341SAndroid Build Coastguard Worker.else
1532*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1, q2}, [\dst, :128]!
1533*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3, q4}, [\ds2, :128]!
1534*c0909341SAndroid Build Coastguard Worker.endif
1535*c0909341SAndroid Build Coastguard Worker        ble             9f
1536*c0909341SAndroid Build Coastguard Worker
1537*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q10
1538*c0909341SAndroid Build Coastguard Worker        vmov            q12, q14
1539*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18, d19}, [\src]!
1540*c0909341SAndroid Build Coastguard Worker        vld1.8          {d26, d27}, [\sr2]!
1541*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d19
1542*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18
1543*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d27
1544*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d26
1545*c0909341SAndroid Build Coastguard Worker        b               16b
1546*c0909341SAndroid Build Coastguard Worker
1547*c0909341SAndroid Build Coastguard Worker9:
1548*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  \d_strd
1549*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \ds2,  \d_strd
1550*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  \s_strd
1551*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \sr2,  \s_strd
1552*c0909341SAndroid Build Coastguard Worker
1553*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1554*c0909341SAndroid Build Coastguard Worker        bgt             161b
1555*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q6}
1556*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1557*c0909341SAndroid Build Coastguard Worker
1558*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v):
1559*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
1560*c0909341SAndroid Build Coastguard Worker        ubfx            r9,  \my, #7, #7
1561*c0909341SAndroid Build Coastguard Worker        and             \my, \my, #0x7f
1562*c0909341SAndroid Build Coastguard Worker        it              gt
1563*c0909341SAndroid Build Coastguard Worker        movgt           \my, r9
1564*c0909341SAndroid Build Coastguard Worker        add             \my, r10, \my, lsl #3
1565*c0909341SAndroid Build Coastguard Worker
1566*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_8tap_v_tbl)
1567*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
1568*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
1569*c0909341SAndroid Build Coastguard Worker        bx              r9
1570*c0909341SAndroid Build Coastguard Worker
1571*c0909341SAndroid Build Coastguard Worker        .align 2
1572*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v_tbl):
1573*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1574*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1575*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1576*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1577*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1578*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1579*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_8tap_v_tbl) + CONFIG_THUMB
1580*c0909341SAndroid Build Coastguard Worker
1581*c0909341SAndroid Build Coastguard Worker20:     // 2xN v
1582*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1583*c0909341SAndroid Build Coastguard Worker        bgt             28f
1584*c0909341SAndroid Build Coastguard Worker
1585*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1586*c0909341SAndroid Build Coastguard Worker        add             \my, \my, #2
1587*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\my]
1588*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
1589*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1590*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1591*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1592*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1593*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1594*c0909341SAndroid Build Coastguard Worker
1595*c0909341SAndroid Build Coastguard Worker        // 2x2 v
1596*c0909341SAndroid Build Coastguard Worker        load_16         \src, \sr2, \s_strd, d1, d2, d3, d4, d5
1597*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d1, d2, d3, d4, d5
1598*c0909341SAndroid Build Coastguard Worker        bgt             24f
1599*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q8, d1, q9, d2, q10, d3, q11, d4
1600*c0909341SAndroid Build Coastguard Worker        mul_mla_4       d6, d16, d18, d20, d22
1601*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,   q3,  d6
1602*c0909341SAndroid Build Coastguard Worker        st_16           \d_strd, d6, 2
1603*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1604*c0909341SAndroid Build Coastguard Worker
1605*c0909341SAndroid Build Coastguard Worker24:     // 2x4 v
1606*c0909341SAndroid Build Coastguard Worker        load_16         \sr2, \src, \s_strd, d6, d7
1607*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d5, d6, d7
1608*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q8, d1, q9, d2, q10, d3, q11, d4, q12, d5, q13, d6
1609*c0909341SAndroid Build Coastguard Worker        vmov            d17, d20
1610*c0909341SAndroid Build Coastguard Worker        vmov            d19, d22
1611*c0909341SAndroid Build Coastguard Worker        vmov            d21, d24
1612*c0909341SAndroid Build Coastguard Worker        vmov            d23, d26
1613*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q3, q8, q9, q10, q11
1614*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,   q3,  d6
1615*c0909341SAndroid Build Coastguard Worker        st_16           \d_strd, d6, 4
1616*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1617*c0909341SAndroid Build Coastguard Worker
1618*c0909341SAndroid Build Coastguard Worker28:     // 2x6, 2x8, 2x12, 2x16 v
1619*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1620*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [\my, :64]
1621*c0909341SAndroid Build Coastguard Worker        sub             \sr2,  \src,  \s_strd, lsl #1
1622*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1623*c0909341SAndroid Build Coastguard Worker        sub             \src,  \sr2,  \s_strd
1624*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1625*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1626*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1627*c0909341SAndroid Build Coastguard Worker
1628*c0909341SAndroid Build Coastguard Worker        load_16         \src, \sr2, \s_strd, d2,  d4,  d6,  d8,  d10, d12, d14
1629*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d2,  d4,  d6,  d8,  d10
1630*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d10, d12, d14
1631*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q1,  d2,  q2,  d4,  q3,  d6,  q4,  d8,  q5,  d10, q6,  d12
1632*c0909341SAndroid Build Coastguard Worker        vmov            d3,  d6
1633*c0909341SAndroid Build Coastguard Worker        vmov            d5,  d8
1634*c0909341SAndroid Build Coastguard Worker        vmov            d7,  d10
1635*c0909341SAndroid Build Coastguard Worker        vmov            d9,  d12
1636*c0909341SAndroid Build Coastguard Worker216:
1637*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1638*c0909341SAndroid Build Coastguard Worker        load_16         \sr2, \src, \s_strd, d16, d18, d20, d22
1639*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d14, d16, d18, d20, d22
1640*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q7,  d14, q8,  d16, q9,  d18, q10, d20
1641*c0909341SAndroid Build Coastguard Worker        vmov            d11, d14
1642*c0909341SAndroid Build Coastguard Worker        vmov            d13, d16
1643*c0909341SAndroid Build Coastguard Worker        vmov            d15, d18
1644*c0909341SAndroid Build Coastguard Worker        vmov            d17, d20
1645*c0909341SAndroid Build Coastguard Worker        mul_mla_8_0     q1,  q1,  q2,  q3,  q4,  q5,  q6,  q7, q8
1646*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,   q1,  d2
1647*c0909341SAndroid Build Coastguard Worker        st_16           \d_strd, d2, 4
1648*c0909341SAndroid Build Coastguard Worker        ble             0f
1649*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1650*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q5
1651*c0909341SAndroid Build Coastguard Worker        vmov            q2,  q6
1652*c0909341SAndroid Build Coastguard Worker        vmov            q3,  q7
1653*c0909341SAndroid Build Coastguard Worker        vmov            q4,  q8
1654*c0909341SAndroid Build Coastguard Worker        vmov            q5,  q9
1655*c0909341SAndroid Build Coastguard Worker        vmov            q6,  q10
1656*c0909341SAndroid Build Coastguard Worker        vmov            d14, d22
1657*c0909341SAndroid Build Coastguard Worker        beq             26f
1658*c0909341SAndroid Build Coastguard Worker        b               216b
1659*c0909341SAndroid Build Coastguard Worker26:
1660*c0909341SAndroid Build Coastguard Worker        load_16         \sr2, \src, \s_strd, d16, d18
1661*c0909341SAndroid Build Coastguard Worker        interleave_1_16 d14, d16, d18
1662*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q7,  d14, q8,  d16
1663*c0909341SAndroid Build Coastguard Worker        vmov            d11, d14
1664*c0909341SAndroid Build Coastguard Worker        vmov            d13, d16
1665*c0909341SAndroid Build Coastguard Worker        mul_mla_8_0     d2,  d2,  d4,  d6,  d8,  d10, d12, d14, d16
1666*c0909341SAndroid Build Coastguard Worker        vqrshrun_s16    6,   q1,  d2
1667*c0909341SAndroid Build Coastguard Worker        st_16           \d_strd, d2, 2
1668*c0909341SAndroid Build Coastguard Worker0:
1669*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1670*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1671*c0909341SAndroid Build Coastguard Worker.endif
1672*c0909341SAndroid Build Coastguard Worker
1673*c0909341SAndroid Build Coastguard Worker40:
1674*c0909341SAndroid Build Coastguard Worker        bgt            480f
1675*c0909341SAndroid Build Coastguard Worker
1676*c0909341SAndroid Build Coastguard Worker        // 4x2, 4x4 v
1677*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1678*c0909341SAndroid Build Coastguard Worker        add             \my, \my, #2
1679*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\my]
1680*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1681*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1682*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1683*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1684*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1685*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1686*c0909341SAndroid Build Coastguard Worker
1687*c0909341SAndroid Build Coastguard Worker        load_32         \src, \sr2, \s_strd, d1, d2, d3, d4, d5
1688*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d1,  d2,  d3,  d4,  d5
1689*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q8,  d1,  q9,  d2,  q10, d3,  q11, d4
1690*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q3,  q8,  q9,  q10, q11
1691*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q3, d6, d7
1692*c0909341SAndroid Build Coastguard Worker        ble             0f
1693*c0909341SAndroid Build Coastguard Worker        load_32         \sr2, \src, \s_strd, d6, d7
1694*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d5,  d6,  d7
1695*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q12, d5,  q13, d6
1696*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q3,  q10, q11, q12, q13
1697*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q3, d6, d7
1698*c0909341SAndroid Build Coastguard Worker0:
1699*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1700*c0909341SAndroid Build Coastguard Worker
1701*c0909341SAndroid Build Coastguard Worker480:    // 4x6, 4x8, 4x12, 4x16 v
1702*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
1703*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [\my, :64]
1704*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
1705*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1706*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
1707*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1708*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1709*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1710*c0909341SAndroid Build Coastguard Worker
1711*c0909341SAndroid Build Coastguard Worker        load_32         \src, \sr2, \s_strd, d2,  d4,  d6,  d8,  d16, d18, d20
1712*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d2,  d4,  d6
1713*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d6,  d8,  d16, d18, d20
1714*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q1,  d2,  q2,  d4,  q3,  d6,  q4,  d8,  q8,  d16, q9,  d18
1715*c0909341SAndroid Build Coastguard Worker
1716*c0909341SAndroid Build Coastguard Worker48:
1717*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1718*c0909341SAndroid Build Coastguard Worker        load_32         \sr2, \src, \s_strd, d22, d24, d26, d28
1719*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d20, d22, d24, d26, d28
1720*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q10, d20, q11, d22, q12, d24, q13, d26
1721*c0909341SAndroid Build Coastguard Worker        mul_mla_8_2     q1,  q2,  q1,  q2,  q3,  q4,  q8,  q9,  q10, q11, q12, q13
1722*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q1,  d2,  d3,  q2,  d4,  d5
1723*c0909341SAndroid Build Coastguard Worker        ble             0f
1724*c0909341SAndroid Build Coastguard Worker        load_32         \sr2,  \src, \s_strd, d30, d2
1725*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1726*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d28, d30, d2
1727*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q14, d28, q15, d30
1728*c0909341SAndroid Build Coastguard Worker        mul_mla_8_0     q8,  q8,  q9,  q10, q11, q12, q13, q14, q15
1729*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q8,  d16, d17
1730*c0909341SAndroid Build Coastguard Worker        ble             0f
1731*c0909341SAndroid Build Coastguard Worker        load_32         \sr2,  \src, \s_strd, d4,  d6
1732*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1733*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d2,  d4,  d6
1734*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q1,  d2,  q2,  d4
1735*c0909341SAndroid Build Coastguard Worker        mul_mla_8_0     q9,  q10, q11, q12, q13, q14, q15, q1,  q2
1736*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q9,  d18, d19
1737*c0909341SAndroid Build Coastguard Worker        ble             0f
1738*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1739*c0909341SAndroid Build Coastguard Worker        load_32         \sr2, \src, \s_strd, d8,  d16, d18, d20
1740*c0909341SAndroid Build Coastguard Worker        interleave_1_32 d6,  d8,  d16, d18, d20
1741*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q3,  d6,  q4,  d8,  q8,  d16, q9, d18
1742*c0909341SAndroid Build Coastguard Worker        mul_mla_8_2     q12, q13, q12, q13, q14, q15, q1,  q2,  q3,  q4,  q8,  q9
1743*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, q12, d24, d25, q13, d26, d27
1744*c0909341SAndroid Build Coastguard Worker        bgt             48b
1745*c0909341SAndroid Build Coastguard Worker0:
1746*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
1747*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1748*c0909341SAndroid Build Coastguard Worker
1749*c0909341SAndroid Build Coastguard Worker80:
1750*c0909341SAndroid Build Coastguard Worker        bgt             880f
1751*c0909341SAndroid Build Coastguard Worker
1752*c0909341SAndroid Build Coastguard Worker        // 8x2, 8x4 v
1753*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1754*c0909341SAndroid Build Coastguard Worker        add             \my, \my, #2
1755*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\my]
1756*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1757*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1758*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1759*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1760*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1761*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1762*c0909341SAndroid Build Coastguard Worker
1763*c0909341SAndroid Build Coastguard Worker        load_reg        \src, \sr2, \s_strd, d1, d2, d3, d4, d5
1764*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q8,  d1,  q9,  d2,  q10, d3,  q11, d4,  q12, d5
1765*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q1,  q8,  q9,  q10, q11
1766*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q2,  q9,  q10, q11, q12
1767*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q1, d2, q2, d4
1768*c0909341SAndroid Build Coastguard Worker        ble             0f
1769*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d6, d7
1770*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q13, d6,  q14, d7
1771*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q1,  q10, q11, q12, q13
1772*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q2,  q11, q12, q13, q14
1773*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q1, d2, q2, d4
1774*c0909341SAndroid Build Coastguard Worker0:
1775*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1776*c0909341SAndroid Build Coastguard Worker
1777*c0909341SAndroid Build Coastguard Worker880:    // 8x6, 8x8, 8x16, 8x32 v
1778*c0909341SAndroid Build Coastguard Worker1680:   // 16x8, 16x16, ...
1779*c0909341SAndroid Build Coastguard Worker320:    // 32x8, 32x16, ...
1780*c0909341SAndroid Build Coastguard Worker640:
1781*c0909341SAndroid Build Coastguard Worker1280:
1782*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
1783*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [\my, :64]
1784*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1785*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #1
1786*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1787*c0909341SAndroid Build Coastguard Worker        mov             \my, \h
1788*c0909341SAndroid Build Coastguard Worker168:
1789*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1790*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1791*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1792*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1793*c0909341SAndroid Build Coastguard Worker
1794*c0909341SAndroid Build Coastguard Worker        load_reg        \src, \sr2, \s_strd, d2,  d4,  d6,  d8,  d16, d18, d20
1795*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q1,  d2,  q2,  d4,  q3,  d6,  q4,  d8,  q8,  d16, q9,  d18, q10, d20
1796*c0909341SAndroid Build Coastguard Worker
1797*c0909341SAndroid Build Coastguard Worker88:
1798*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1799*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d22, d24
1800*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q11, d22, q12, d24
1801*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q1,  q2,  q1,  q2,  q3,  q4,  q8,  q9,  q10,  q11, q12
1802*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q1,  d2,  q2,  d4
1803*c0909341SAndroid Build Coastguard Worker        ble             9f
1804*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1805*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d26, d28
1806*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q13, d26, q14, d28
1807*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q3,  q4,  q3,  q4,  q8,  q9,  q10, q11, q12, q13, q14
1808*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q3,  d6,  q4,  d8
1809*c0909341SAndroid Build Coastguard Worker        ble             9f
1810*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1811*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d30, d2
1812*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q15, d30, q1,  d2
1813*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q8,  q9,  q8,  q9,  q10, q11, q12, q13, q14, q15, q1
1814*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q8,  d16, q9,  d18
1815*c0909341SAndroid Build Coastguard Worker        ble             9f
1816*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1817*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d4,  d6
1818*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q2,  d4,  q3,  d6
1819*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q10, q11, q10, q11, q12, q13, q14, q15, q1,  q2,  q3
1820*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q10, d20, q11, d22
1821*c0909341SAndroid Build Coastguard Worker        ble             9f
1822*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1823*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, d8,  d16, d18, d20
1824*c0909341SAndroid Build Coastguard Worker        vmovl_u8        q4,  d8,  q8,  d16, q9,  d18, q10, d20
1825*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q12, q13, q12, q13, q14, q15, q1,  q2,  q3,  q4,  q8
1826*c0909341SAndroid Build Coastguard Worker        mul_mla_8_1     q14, q15, q14, q15, q1,  q2,  q3,  q4,  q8,  q9,  q10
1827*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, q12, d24, q13, d26, q14, d28, q15, d30
1828*c0909341SAndroid Build Coastguard Worker        bgt             88b
1829*c0909341SAndroid Build Coastguard Worker9:
1830*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
1831*c0909341SAndroid Build Coastguard Worker        ble             0f
1832*c0909341SAndroid Build Coastguard Worker        asr             \s_strd, \s_strd, #1
1833*c0909341SAndroid Build Coastguard Worker        asr             \d_strd, \d_strd, #1
1834*c0909341SAndroid Build Coastguard Worker        mls             \src, \s_strd, \my, \src
1835*c0909341SAndroid Build Coastguard Worker        mls             \dst, \d_strd, \my, \dst
1836*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #3
1837*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
1838*c0909341SAndroid Build Coastguard Worker        add             \src, \src, #8
1839*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1840*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #8
1841*c0909341SAndroid Build Coastguard Worker.else
1842*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #16
1843*c0909341SAndroid Build Coastguard Worker.endif
1844*c0909341SAndroid Build Coastguard Worker        b               168b
1845*c0909341SAndroid Build Coastguard Worker0:
1846*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
1847*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1848*c0909341SAndroid Build Coastguard Worker
1849*c0909341SAndroid Build Coastguard Worker160:
1850*c0909341SAndroid Build Coastguard Worker        bgt             1680b
1851*c0909341SAndroid Build Coastguard Worker
1852*c0909341SAndroid Build Coastguard Worker        // 16x2, 16x4 v
1853*c0909341SAndroid Build Coastguard Worker        add             \my, \my, #2
1854*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [\my]
1855*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1856*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1857*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1858*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1859*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1860*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1861*c0909341SAndroid Build Coastguard Worker
1862*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1863*c0909341SAndroid Build Coastguard Worker        load_reg        \src, \sr2, \s_strd, q11, q12, q13, q14, q15
1864*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d22
1865*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d24
1866*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q3,  d26
1867*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d28
1868*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d30
1869*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d23
1870*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d25
1871*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d27
1872*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d29
1873*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q15, d31
1874*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q1,  q1,  q2,  q3,  q8
1875*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q10, q2,  q3,  q8,  q9
1876*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q2,  q11, q12, q13, q14
1877*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q11, q12, q13, q14, q15
1878*c0909341SAndroid Build Coastguard Worker        shift_store_16  \type, \d_strd, q1, d2, d3, q2, q10, d20, d21, q11
1879*c0909341SAndroid Build Coastguard Worker        ble             0f
1880*c0909341SAndroid Build Coastguard Worker        load_reg        \sr2, \src, \s_strd, q10, q11
1881*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d20
1882*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d21
1883*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d22
1884*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d23
1885*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q2,  q3,  q8,  q9,  q1
1886*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q3,  q13, q14, q15, q10
1887*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q13, q8,  q9,  q1,  q12
1888*c0909341SAndroid Build Coastguard Worker        mul_mla_4       q14, q14, q15, q10, q11
1889*c0909341SAndroid Build Coastguard Worker        shift_store_16  \type, \d_strd, q2, d4, d5, q3, q13, d26, d27, q14
1890*c0909341SAndroid Build Coastguard Worker0:
1891*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1892*c0909341SAndroid Build Coastguard Worker
1893*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv):
1894*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
1895*c0909341SAndroid Build Coastguard Worker        ubfx            r9,  \my, #7, #7
1896*c0909341SAndroid Build Coastguard Worker        and             \my, \my, #0x7f
1897*c0909341SAndroid Build Coastguard Worker        it              gt
1898*c0909341SAndroid Build Coastguard Worker        movgt           \my, r9
1899*c0909341SAndroid Build Coastguard Worker        add             \my,  r10, \my, lsl #3
1900*c0909341SAndroid Build Coastguard Worker
1901*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_8tap_hv_tbl)
1902*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
1903*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
1904*c0909341SAndroid Build Coastguard Worker        bx              r9
1905*c0909341SAndroid Build Coastguard Worker
1906*c0909341SAndroid Build Coastguard Worker        .align 2
1907*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_hv_tbl):
1908*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1909*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1910*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1911*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1912*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1913*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1914*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_8tap_hv_tbl) + CONFIG_THUMB
1915*c0909341SAndroid Build Coastguard Worker
1916*c0909341SAndroid Build Coastguard Worker20:
1917*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1918*c0909341SAndroid Build Coastguard Worker        add             \mx,  \mx,  #2
1919*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [\mx]
1920*c0909341SAndroid Build Coastguard Worker        bgt             280f
1921*c0909341SAndroid Build Coastguard Worker        add             \my,  \my,  #2
1922*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]},  [\my]
1923*c0909341SAndroid Build Coastguard Worker
1924*c0909341SAndroid Build Coastguard Worker        // 2x2, 2x4 hv
1925*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, #1
1926*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
1927*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1928*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1929*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1930*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1931*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
1932*c0909341SAndroid Build Coastguard Worker
1933*c0909341SAndroid Build Coastguard Worker
1934*c0909341SAndroid Build Coastguard Worker        vld1.8          {d26}, [\src], \s_strd
1935*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d26
1936*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q13, q13, #2
1937*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d26, d0
1938*c0909341SAndroid Build Coastguard Worker        vmul.s16        d28, d28, d0
1939*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d26, d26, d28
1940*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d26, d26, d26
1941*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d26, #2
1942*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
1943*c0909341SAndroid Build Coastguard Worker
1944*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d16, #4
1945*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
1946*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d26, #4
1947*c0909341SAndroid Build Coastguard Worker
1948*c0909341SAndroid Build Coastguard Worker2:
1949*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
1950*c0909341SAndroid Build Coastguard Worker
1951*c0909341SAndroid Build Coastguard Worker        vext.8          d18, d17, d26, #4
1952*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2[0]
1953*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d17, d2[1]
1954*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d18, d2[2]
1955*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d2[3]
1956*c0909341SAndroid Build Coastguard Worker
1957*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #\shift_hv
1958*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q2
1959*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1960*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[0]}, [\dst, :16], \d_strd
1961*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[1]}, [\ds2, :16], \d_strd
1962*c0909341SAndroid Build Coastguard Worker        ble             0f
1963*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
1964*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
1965*c0909341SAndroid Build Coastguard Worker        b               2b
1966*c0909341SAndroid Build Coastguard Worker
1967*c0909341SAndroid Build Coastguard Worker280:    // 2x8, 2x16, 2x32 hv
1968*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2},  [\my, :64]
1969*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, #1
1970*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
1971*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
1972*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1973*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1974*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1975*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
1976*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
1977*c0909341SAndroid Build Coastguard Worker
1978*c0909341SAndroid Build Coastguard Worker        vld1.8          {d26}, [\src], \s_strd
1979*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d26
1980*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q13, q13, #2
1981*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d26, d0
1982*c0909341SAndroid Build Coastguard Worker        vmul.s16        d28, d28, d0
1983*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d26, d26, d28
1984*c0909341SAndroid Build Coastguard Worker        vpadd.s16       d26, d26, d26
1985*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d26, #2
1986*c0909341SAndroid Build Coastguard Worker
1987*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
1988*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d16, #4
1989*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
1990*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d26, #4
1991*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
1992*c0909341SAndroid Build Coastguard Worker        vext.8          d18, d17, d26, #4
1993*c0909341SAndroid Build Coastguard Worker        vmov            d19, d26
1994*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
1995*c0909341SAndroid Build Coastguard Worker        vext.8          d20, d19, d26, #4
1996*c0909341SAndroid Build Coastguard Worker        vmov            d21, d26
1997*c0909341SAndroid Build Coastguard Worker
1998*c0909341SAndroid Build Coastguard Worker28:
1999*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_2)
2000*c0909341SAndroid Build Coastguard Worker        vext.8          d22, d21, d26, #4
2001*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2[0]
2002*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d17, d2[1]
2003*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d18, d2[2]
2004*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d19, d2[3]
2005*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d20, d3[0]
2006*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d21, d3[1]
2007*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d22, d3[2]
2008*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d3[3]
2009*c0909341SAndroid Build Coastguard Worker
2010*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #\shift_hv
2011*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q2
2012*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2013*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[0]}, [\dst, :16], \d_strd
2014*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[1]}, [\ds2, :16], \d_strd
2015*c0909341SAndroid Build Coastguard Worker        ble             0f
2016*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
2017*c0909341SAndroid Build Coastguard Worker        vmov            d17, d19
2018*c0909341SAndroid Build Coastguard Worker        vmov            d18, d20
2019*c0909341SAndroid Build Coastguard Worker        vmov            d19, d21
2020*c0909341SAndroid Build Coastguard Worker        vmov            d20, d22
2021*c0909341SAndroid Build Coastguard Worker        vmov            d21, d26
2022*c0909341SAndroid Build Coastguard Worker        b               28b
2023*c0909341SAndroid Build Coastguard Worker
2024*c0909341SAndroid Build Coastguard Worker0:
2025*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2026*c0909341SAndroid Build Coastguard Worker
2027*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_2):
2028*c0909341SAndroid Build Coastguard Worker        vld1.8          {d28},  [\sr2], \s_strd
2029*c0909341SAndroid Build Coastguard Worker        vld1.8          {d30},  [\src], \s_strd
2030*c0909341SAndroid Build Coastguard Worker        vext.8          d29, d28, d28, #1
2031*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d30, d30, #1
2032*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d28
2033*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d29
2034*c0909341SAndroid Build Coastguard Worker        vmov            d27, d28
2035*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d30
2036*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q15, d31
2037*c0909341SAndroid Build Coastguard Worker        vtrn.32         d26, d28
2038*c0909341SAndroid Build Coastguard Worker        vtrn.32         d27, d30
2039*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d26, d0[0]
2040*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d27, d0[1]
2041*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d28, d0[2]
2042*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d30, d0[3]
2043*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d26, d26, #2
2044*c0909341SAndroid Build Coastguard Worker        vext.8          d27, d26, d26, #4
2045*c0909341SAndroid Build Coastguard Worker        bx              lr
2046*c0909341SAndroid Build Coastguard Worker.endif
2047*c0909341SAndroid Build Coastguard Worker
2048*c0909341SAndroid Build Coastguard Worker40:
2049*c0909341SAndroid Build Coastguard Worker        add             \mx, \mx, #2
2050*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [\mx]
2051*c0909341SAndroid Build Coastguard Worker        bgt             480f
2052*c0909341SAndroid Build Coastguard Worker        add             \my, \my,  #2
2053*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]},  [\my]
2054*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, #1
2055*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2056*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2057*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2058*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2059*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
2060*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
2061*c0909341SAndroid Build Coastguard Worker
2062*c0909341SAndroid Build Coastguard Worker        // 4x2, 4x4 hv
2063*c0909341SAndroid Build Coastguard Worker        vld1.8          {d30}, [\src], \s_strd
2064*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d30
2065*c0909341SAndroid Build Coastguard Worker        vext.8          d27, d28, d29, #2
2066*c0909341SAndroid Build Coastguard Worker        vext.8          d30, d28, d29, #4
2067*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d28, d29, #6
2068*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d28, d0[0]
2069*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d27, d0[1]
2070*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d30, d0[2]
2071*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d31, d0[3]
2072*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d26, #2
2073*c0909341SAndroid Build Coastguard Worker
2074*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2075*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
2076*c0909341SAndroid Build Coastguard Worker        vmov            d18, d27
2077*c0909341SAndroid Build Coastguard Worker
2078*c0909341SAndroid Build Coastguard Worker4:
2079*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2080*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2[0]
2081*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d17, d2[1]
2082*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d18, d2[2]
2083*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d2[3]
2084*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d17, d2[0]
2085*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d18, d2[1]
2086*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d26, d2[2]
2087*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d27, d2[3]
2088*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #\shift_hv
2089*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q3,  #\shift_hv
2090*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2091*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2092*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q2
2093*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d6,  q3
2094*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [\dst, :32], \d_strd
2095*c0909341SAndroid Build Coastguard Worker        vst1.32         {d6[0]}, [\ds2, :32], \d_strd
2096*c0909341SAndroid Build Coastguard Worker.else
2097*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [\dst, :64], \d_strd
2098*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6}, [\ds2, :64], \d_strd
2099*c0909341SAndroid Build Coastguard Worker.endif
2100*c0909341SAndroid Build Coastguard Worker        ble             0f
2101*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
2102*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
2103*c0909341SAndroid Build Coastguard Worker        vmov            d18, d27
2104*c0909341SAndroid Build Coastguard Worker        b               4b
2105*c0909341SAndroid Build Coastguard Worker
2106*c0909341SAndroid Build Coastguard Worker480:    // 4x8, 4x16, 4x32 hv
2107*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2},  [\my, :64]
2108*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, #1
2109*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
2110*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2111*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2112*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2113*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2114*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
2115*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
2116*c0909341SAndroid Build Coastguard Worker
2117*c0909341SAndroid Build Coastguard Worker        vld1.8          {d30}, [\src], \s_strd
2118*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d30
2119*c0909341SAndroid Build Coastguard Worker        vext.8          d27, d28, d29, #2
2120*c0909341SAndroid Build Coastguard Worker        vext.8          d30, d28, d29, #4
2121*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d28, d29, #6
2122*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d28, d0[0]
2123*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d27, d0[1]
2124*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d30, d0[2]
2125*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d31, d0[3]
2126*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d26, #2
2127*c0909341SAndroid Build Coastguard Worker
2128*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2129*c0909341SAndroid Build Coastguard Worker        vmov            d17, d26
2130*c0909341SAndroid Build Coastguard Worker        vmov            d18, d27
2131*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2132*c0909341SAndroid Build Coastguard Worker        vmov            d19, d26
2133*c0909341SAndroid Build Coastguard Worker        vmov            d20, d27
2134*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2135*c0909341SAndroid Build Coastguard Worker        vmov            d21, d26
2136*c0909341SAndroid Build Coastguard Worker        vmov            d22, d27
2137*c0909341SAndroid Build Coastguard Worker
2138*c0909341SAndroid Build Coastguard Worker48:
2139*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_4)
2140*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2[0]
2141*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d17, d2[1]
2142*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d18, d2[2]
2143*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d19, d2[3]
2144*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d20, d3[0]
2145*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d21, d3[1]
2146*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d22, d3[2]
2147*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d3[3]
2148*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d17, d2[0]
2149*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d18, d2[1]
2150*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d19, d2[2]
2151*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d20, d2[3]
2152*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d21, d3[0]
2153*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d22, d3[1]
2154*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d26, d3[2]
2155*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d27, d3[3]
2156*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #\shift_hv
2157*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q3,  #\shift_hv
2158*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2159*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2160*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q2
2161*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d6,  q3
2162*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [\dst, :32], \d_strd
2163*c0909341SAndroid Build Coastguard Worker        vst1.32         {d6[0]}, [\ds2, :32], \d_strd
2164*c0909341SAndroid Build Coastguard Worker.else
2165*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [\dst, :64], \d_strd
2166*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6}, [\ds2, :64], \d_strd
2167*c0909341SAndroid Build Coastguard Worker.endif
2168*c0909341SAndroid Build Coastguard Worker        ble             0f
2169*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
2170*c0909341SAndroid Build Coastguard Worker        vmov            d17, d19
2171*c0909341SAndroid Build Coastguard Worker        vmov            d18, d20
2172*c0909341SAndroid Build Coastguard Worker        vmov            d19, d21
2173*c0909341SAndroid Build Coastguard Worker        vmov            d20, d22
2174*c0909341SAndroid Build Coastguard Worker        vmov            d21, d26
2175*c0909341SAndroid Build Coastguard Worker        vmov            d22, d27
2176*c0909341SAndroid Build Coastguard Worker        b               48b
2177*c0909341SAndroid Build Coastguard Worker0:
2178*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2179*c0909341SAndroid Build Coastguard Worker
2180*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_4):
2181*c0909341SAndroid Build Coastguard Worker        vld1.8          {d30}, [\sr2], \s_strd
2182*c0909341SAndroid Build Coastguard Worker        vld1.8          {d31}, [\src], \s_strd
2183*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d30
2184*c0909341SAndroid Build Coastguard Worker        vext.8          d27, d28, d29, #2
2185*c0909341SAndroid Build Coastguard Worker        vext.8          d30, d28, d29, #4
2186*c0909341SAndroid Build Coastguard Worker        vext.8          d1,  d28, d29, #6
2187*c0909341SAndroid Build Coastguard Worker        vmul.s16        d26, d28, d0[0]
2188*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d27, d0[1]
2189*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d30, d0[2]
2190*c0909341SAndroid Build Coastguard Worker        vmla.s16        d26, d1,  d0[3]
2191*c0909341SAndroid Build Coastguard Worker
2192*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q14, d31
2193*c0909341SAndroid Build Coastguard Worker        vext.8          d30, d28, d29, #2
2194*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d28, d29, #4
2195*c0909341SAndroid Build Coastguard Worker        vext.8          d1,  d28, d29, #6
2196*c0909341SAndroid Build Coastguard Worker        vmul.s16        d27, d28, d0[0]
2197*c0909341SAndroid Build Coastguard Worker        vmla.s16        d27, d30, d0[1]
2198*c0909341SAndroid Build Coastguard Worker        vmla.s16        d27, d31, d0[2]
2199*c0909341SAndroid Build Coastguard Worker        vmla.s16        d27, d1,  d0[3]
2200*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d26, d26, #2
2201*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d27, d27, #2
2202*c0909341SAndroid Build Coastguard Worker        bx              lr
2203*c0909341SAndroid Build Coastguard Worker
2204*c0909341SAndroid Build Coastguard Worker80:
2205*c0909341SAndroid Build Coastguard Worker160:
2206*c0909341SAndroid Build Coastguard Worker320:
2207*c0909341SAndroid Build Coastguard Worker        bgt             880f
2208*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2209*c0909341SAndroid Build Coastguard Worker        add             \my,  \my,  #2
2210*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [\mx, :64]
2211*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]},  [\my]
2212*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
2213*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
2214*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
2215*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
2216*c0909341SAndroid Build Coastguard Worker        mov             \my, \h
2217*c0909341SAndroid Build Coastguard Worker
2218*c0909341SAndroid Build Coastguard Worker164:    // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv
2219*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2220*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2221*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2222*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2223*c0909341SAndroid Build Coastguard Worker
2224*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14},  [\src], \s_strd
2225*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d28
2226*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d29
2227*c0909341SAndroid Build Coastguard Worker        vmul.s16        q10, q12, d0[0]
2228*c0909341SAndroid Build Coastguard Worker.irpc i, 123
2229*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2230*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d0[\i]
2231*c0909341SAndroid Build Coastguard Worker.endr
2232*c0909341SAndroid Build Coastguard Worker.irpc i, 4567
2233*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2234*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d1[\i-4]
2235*c0909341SAndroid Build Coastguard Worker.endr
2236*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q10, #2
2237*c0909341SAndroid Build Coastguard Worker
2238*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2239*c0909341SAndroid Build Coastguard Worker        vmov            q4,  q10
2240*c0909341SAndroid Build Coastguard Worker        vmov            q5,  q11
2241*c0909341SAndroid Build Coastguard Worker
2242*c0909341SAndroid Build Coastguard Worker8:
2243*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2244*c0909341SAndroid Build Coastguard Worker        vmull.s16       q12, d6,  d2[0]
2245*c0909341SAndroid Build Coastguard Worker        vmull.s16       q13, d7,  d2[0]
2246*c0909341SAndroid Build Coastguard Worker        vmull.s16       q14, d8,  d2[0]
2247*c0909341SAndroid Build Coastguard Worker        vmull.s16       q15, d9,  d2[0]
2248*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d8,  d2[1]
2249*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d9,  d2[1]
2250*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d10, d2[1]
2251*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d11, d2[1]
2252*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d10, d2[2]
2253*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d11, d2[2]
2254*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d20, d2[2]
2255*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d21, d2[2]
2256*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d20, d2[3]
2257*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d21, d2[3]
2258*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d22, d2[3]
2259*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d23, d2[3]
2260*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q12, #\shift_hv
2261*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q13, #\shift_hv
2262*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q14, #\shift_hv
2263*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q15, #\shift_hv
2264*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2265*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2266*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d24, q12
2267*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d28, q14
2268*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [\dst, :64], \d_strd
2269*c0909341SAndroid Build Coastguard Worker        vst1.8          {d28}, [\ds2, :64], \d_strd
2270*c0909341SAndroid Build Coastguard Worker.else
2271*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12}, [\dst, :128], \d_strd
2272*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14}, [\ds2, :128], \d_strd
2273*c0909341SAndroid Build Coastguard Worker.endif
2274*c0909341SAndroid Build Coastguard Worker        ble             9f
2275*c0909341SAndroid Build Coastguard Worker        vmov            q3,  q5
2276*c0909341SAndroid Build Coastguard Worker        vmov            q4,  q10
2277*c0909341SAndroid Build Coastguard Worker        vmov            q5,  q11
2278*c0909341SAndroid Build Coastguard Worker        b               8b
2279*c0909341SAndroid Build Coastguard Worker9:
2280*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
2281*c0909341SAndroid Build Coastguard Worker        ble             0f
2282*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
2283*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
2284*c0909341SAndroid Build Coastguard Worker        mls             \src,  \s_strd,  \my,  \src
2285*c0909341SAndroid Build Coastguard Worker        mls             \dst,  \d_strd,  \my,  \dst
2286*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #2
2287*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2288*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
2289*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2290*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
2291*c0909341SAndroid Build Coastguard Worker.else
2292*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
2293*c0909341SAndroid Build Coastguard Worker.endif
2294*c0909341SAndroid Build Coastguard Worker        b               164b
2295*c0909341SAndroid Build Coastguard Worker
2296*c0909341SAndroid Build Coastguard Worker880:    // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv
2297*c0909341SAndroid Build Coastguard Worker640:
2298*c0909341SAndroid Build Coastguard Worker1280:
2299*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2300*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [\mx, :64]
2301*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2},  [\my, :64]
2302*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
2303*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
2304*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd, lsl #1
2305*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q0,  d0
2306*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d2
2307*c0909341SAndroid Build Coastguard Worker        mov             \my, \h
2308*c0909341SAndroid Build Coastguard Worker
2309*c0909341SAndroid Build Coastguard Worker168:
2310*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2311*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2312*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2313*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2314*c0909341SAndroid Build Coastguard Worker
2315*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14},  [\src], \s_strd
2316*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d28
2317*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d29
2318*c0909341SAndroid Build Coastguard Worker        vmul.s16        q10, q12, d0[0]
2319*c0909341SAndroid Build Coastguard Worker.irpc i, 123
2320*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2321*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d0[\i]
2322*c0909341SAndroid Build Coastguard Worker.endr
2323*c0909341SAndroid Build Coastguard Worker.irpc i, 4567
2324*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2325*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d1[\i-4]
2326*c0909341SAndroid Build Coastguard Worker.endr
2327*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q10, #2
2328*c0909341SAndroid Build Coastguard Worker
2329*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2330*c0909341SAndroid Build Coastguard Worker        vmov            q4,  q10
2331*c0909341SAndroid Build Coastguard Worker        vmov            q5,  q11
2332*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2333*c0909341SAndroid Build Coastguard Worker        vmov            q6,  q10
2334*c0909341SAndroid Build Coastguard Worker        vmov            q7,  q11
2335*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2336*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q10
2337*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q11
2338*c0909341SAndroid Build Coastguard Worker
2339*c0909341SAndroid Build Coastguard Worker88:
2340*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_8tap_filter_8)
2341*c0909341SAndroid Build Coastguard Worker        vmull.s16       q12, d6,  d2[0]
2342*c0909341SAndroid Build Coastguard Worker        vmull.s16       q13, d7,  d2[0]
2343*c0909341SAndroid Build Coastguard Worker        vmull.s16       q14, d8,  d2[0]
2344*c0909341SAndroid Build Coastguard Worker        vmull.s16       q15, d9,  d2[0]
2345*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d8,  d2[1]
2346*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d9,  d2[1]
2347*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d10, d2[1]
2348*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d11, d2[1]
2349*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d10, d2[2]
2350*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d11, d2[2]
2351*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d12, d2[2]
2352*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d13, d2[2]
2353*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d12, d2[3]
2354*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d13, d2[3]
2355*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d14, d2[3]
2356*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d15, d2[3]
2357*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d14, d3[0]
2358*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d15, d3[0]
2359*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d16, d3[0]
2360*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d17, d3[0]
2361*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d16, d3[1]
2362*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d17, d3[1]
2363*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d18, d3[1]
2364*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d19, d3[1]
2365*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d18, d3[2]
2366*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d19, d3[2]
2367*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d20, d3[2]
2368*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d21, d3[2]
2369*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d20, d3[3]
2370*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d21, d3[3]
2371*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d22, d3[3]
2372*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d23, d3[3]
2373*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q12, #\shift_hv
2374*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q13, #\shift_hv
2375*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q14, #\shift_hv
2376*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q15, #\shift_hv
2377*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2378*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2379*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d24, q12
2380*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d28, q14
2381*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [\dst, :64], \d_strd
2382*c0909341SAndroid Build Coastguard Worker        vst1.8          {d28}, [\ds2, :64], \d_strd
2383*c0909341SAndroid Build Coastguard Worker.else
2384*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12}, [\dst, :128], \d_strd
2385*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14}, [\ds2, :128], \d_strd
2386*c0909341SAndroid Build Coastguard Worker.endif
2387*c0909341SAndroid Build Coastguard Worker        ble             9f
2388*c0909341SAndroid Build Coastguard Worker        vmov            q3,  q5
2389*c0909341SAndroid Build Coastguard Worker        vmov            q4,  q6
2390*c0909341SAndroid Build Coastguard Worker        vmov            q5,  q7
2391*c0909341SAndroid Build Coastguard Worker        vmov            q6,  q8
2392*c0909341SAndroid Build Coastguard Worker        vmov            q7,  q9
2393*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q10
2394*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q11
2395*c0909341SAndroid Build Coastguard Worker        b               88b
2396*c0909341SAndroid Build Coastguard Worker9:
2397*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
2398*c0909341SAndroid Build Coastguard Worker        ble             0f
2399*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
2400*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
2401*c0909341SAndroid Build Coastguard Worker        mls             \src,  \s_strd,  \my,  \src
2402*c0909341SAndroid Build Coastguard Worker        mls             \dst,  \d_strd,  \my,  \dst
2403*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #3
2404*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2405*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
2406*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2407*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
2408*c0909341SAndroid Build Coastguard Worker.else
2409*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
2410*c0909341SAndroid Build Coastguard Worker.endif
2411*c0909341SAndroid Build Coastguard Worker        b               168b
2412*c0909341SAndroid Build Coastguard Worker0:
2413*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2414*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2415*c0909341SAndroid Build Coastguard Worker
2416*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_filter_8):
2417*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14},  [\sr2], \s_strd
2418*c0909341SAndroid Build Coastguard Worker        vld1.8          {q15},  [\src], \s_strd
2419*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d28
2420*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d29
2421*c0909341SAndroid Build Coastguard Worker        vmul.s16        q10, q12, d0[0]
2422*c0909341SAndroid Build Coastguard Worker.irpc i, 123
2423*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2424*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d0[\i]
2425*c0909341SAndroid Build Coastguard Worker.endr
2426*c0909341SAndroid Build Coastguard Worker.irpc i, 4567
2427*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2428*c0909341SAndroid Build Coastguard Worker        vmla.s16        q10, q14, d1[\i-4]
2429*c0909341SAndroid Build Coastguard Worker.endr
2430*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q12, d30
2431*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q13, d31
2432*c0909341SAndroid Build Coastguard Worker        vmul.s16        q11, q12, d0[0]
2433*c0909341SAndroid Build Coastguard Worker.irpc i, 123
2434*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2435*c0909341SAndroid Build Coastguard Worker        vmla.s16        q11, q14, d0[\i]
2436*c0909341SAndroid Build Coastguard Worker.endr
2437*c0909341SAndroid Build Coastguard Worker.irpc i, 4567
2438*c0909341SAndroid Build Coastguard Worker        vext.8          q14, q12, q13, #(2*\i)
2439*c0909341SAndroid Build Coastguard Worker        vmla.s16        q11, q14, d1[\i-4]
2440*c0909341SAndroid Build Coastguard Worker.endr
2441*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q10, #2
2442*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q11, q11, #2
2443*c0909341SAndroid Build Coastguard Worker        bx              lr
2444*c0909341SAndroid Build Coastguard Workerendfunc
2445*c0909341SAndroid Build Coastguard Worker
2446*c0909341SAndroid Build Coastguard Worker
2447*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_8bpc_neon, export=1
2448*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2449*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #36]
2450*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #44]
2451*c0909341SAndroid Build Coastguard Worker        vdup.8          d1,  \mx
2452*c0909341SAndroid Build Coastguard Worker        vdup.8          d3,  \my
2453*c0909341SAndroid Build Coastguard Worker        rsb             r8,  \mx, #16
2454*c0909341SAndroid Build Coastguard Worker        rsb             r9,  \my, #16
2455*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  r8
2456*c0909341SAndroid Build Coastguard Worker        vdup.8          d2,  r9
2457*c0909341SAndroid Build Coastguard Worker.ifc \type, prep
2458*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \w, #1
2459*c0909341SAndroid Build Coastguard Worker.endif
2460*c0909341SAndroid Build Coastguard Worker        clz             r8,  \w
2461*c0909341SAndroid Build Coastguard Worker        cmp             \mx, #0
2462*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #24
2463*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_bilin_h)
2464*c0909341SAndroid Build Coastguard Worker        cmp             \my, #0
2465*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_bilin_v)
2466*c0909341SAndroid Build Coastguard Worker        b               \type\()_neon
2467*c0909341SAndroid Build Coastguard Worker
2468*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h):
2469*c0909341SAndroid Build Coastguard Worker        cmp             \my, #0
2470*c0909341SAndroid Build Coastguard Worker        bne             L(\type\()_bilin_hv)
2471*c0909341SAndroid Build Coastguard Worker
2472*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_bilin_h_tbl)
2473*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
2474*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
2475*c0909341SAndroid Build Coastguard Worker        bx              r9
2476*c0909341SAndroid Build Coastguard Worker
2477*c0909341SAndroid Build Coastguard Worker        .align 2
2478*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h_tbl):
2479*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2480*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2481*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2482*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2483*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2484*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2485*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_bilin_h_tbl) + CONFIG_THUMB
2486*c0909341SAndroid Build Coastguard Worker
2487*c0909341SAndroid Build Coastguard Worker20:     // 2xN h
2488*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2489*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2490*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2491*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2492*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2493*c0909341SAndroid Build Coastguard Worker2:
2494*c0909341SAndroid Build Coastguard Worker        vld1.32         {d4[]},  [\src], \s_strd
2495*c0909341SAndroid Build Coastguard Worker        vld1.32         {d6[]},  [\sr2], \s_strd
2496*c0909341SAndroid Build Coastguard Worker        vext.8          d5,  d4,  d4, #1
2497*c0909341SAndroid Build Coastguard Worker        vext.8          d7,  d6,  d6, #1
2498*c0909341SAndroid Build Coastguard Worker        vtrn.16         q2,  q3
2499*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2500*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d4,  d0
2501*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d5,  d1
2502*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q3,  #4
2503*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[0]}, [\dst, :16], \d_strd
2504*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[1]}, [\ds2, :16], \d_strd
2505*c0909341SAndroid Build Coastguard Worker        bgt             2b
2506*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2507*c0909341SAndroid Build Coastguard Worker.endif
2508*c0909341SAndroid Build Coastguard Worker
2509*c0909341SAndroid Build Coastguard Worker40:     // 4xN h
2510*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2511*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2512*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2513*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2514*c0909341SAndroid Build Coastguard Worker4:
2515*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4}, [\src], \s_strd
2516*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6}, [\sr2], \s_strd
2517*c0909341SAndroid Build Coastguard Worker        vext.8          d5,  d4,  d4, #1
2518*c0909341SAndroid Build Coastguard Worker        vext.8          d7,  d6,  d6, #1
2519*c0909341SAndroid Build Coastguard Worker        vtrn.32         q2,  q3
2520*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2521*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d4,  d0
2522*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d5,  d1
2523*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2524*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q3,  #4
2525*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [\dst, :32], \d_strd
2526*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[1]}, [\ds2, :32], \d_strd
2527*c0909341SAndroid Build Coastguard Worker.else
2528*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6}, [\dst, :64], \d_strd
2529*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [\ds2, :64], \d_strd
2530*c0909341SAndroid Build Coastguard Worker.endif
2531*c0909341SAndroid Build Coastguard Worker        bgt             4b
2532*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2533*c0909341SAndroid Build Coastguard Worker
2534*c0909341SAndroid Build Coastguard Worker80:     // 8xN h
2535*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2536*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2537*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2538*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2539*c0909341SAndroid Build Coastguard Worker8:
2540*c0909341SAndroid Build Coastguard Worker        vld1.8          {q8},  [\src], \s_strd
2541*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10}, [\sr2], \s_strd
2542*c0909341SAndroid Build Coastguard Worker        vext.8          q9,  q8,  q8,  #1
2543*c0909341SAndroid Build Coastguard Worker        vext.8          q11, q10, q10, #1
2544*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2545*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d16, d0
2546*c0909341SAndroid Build Coastguard Worker        vmull.u8        q10, d20, d0
2547*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d18, d1
2548*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q10, d22, d1
2549*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2550*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d16,  q8,  #4
2551*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d18,  q10, #4
2552*c0909341SAndroid Build Coastguard Worker        vst1.8          {d16}, [\dst, :64], \d_strd
2553*c0909341SAndroid Build Coastguard Worker        vst1.8          {d18}, [\ds2, :64], \d_strd
2554*c0909341SAndroid Build Coastguard Worker.else
2555*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8},  [\dst, :128], \d_strd
2556*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10}, [\ds2, :128], \d_strd
2557*c0909341SAndroid Build Coastguard Worker.endif
2558*c0909341SAndroid Build Coastguard Worker        bgt             8b
2559*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2560*c0909341SAndroid Build Coastguard Worker160:
2561*c0909341SAndroid Build Coastguard Worker320:
2562*c0909341SAndroid Build Coastguard Worker640:
2563*c0909341SAndroid Build Coastguard Worker1280:   // 16xN, 32xN, ... h
2564*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2565*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2566*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2567*c0909341SAndroid Build Coastguard Worker
2568*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  \w
2569*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  #8
2570*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2571*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2572*c0909341SAndroid Build Coastguard Worker        sub             \d_strd,  \d_strd,  \w
2573*c0909341SAndroid Build Coastguard Worker.endif
2574*c0909341SAndroid Build Coastguard Worker161:
2575*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16},  [\src]!
2576*c0909341SAndroid Build Coastguard Worker        vld1.8          {d22},  [\sr2]!
2577*c0909341SAndroid Build Coastguard Worker        mov             \mx, \w
2578*c0909341SAndroid Build Coastguard Worker
2579*c0909341SAndroid Build Coastguard Worker16:
2580*c0909341SAndroid Build Coastguard Worker        vld1.8          {d17,d18},  [\src]!
2581*c0909341SAndroid Build Coastguard Worker        vld1.8          {d23,d24},  [\sr2]!
2582*c0909341SAndroid Build Coastguard Worker        vext.8          q10, q8,  q9,  #1
2583*c0909341SAndroid Build Coastguard Worker        vext.8          q13, q11, q12, #1
2584*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d16, d0
2585*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d17, d0
2586*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d22, d0
2587*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d23, d0
2588*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d20, d1
2589*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d21, d1
2590*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d26, d1
2591*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d27, d1
2592*c0909341SAndroid Build Coastguard Worker        subs            \mx, \mx, #16
2593*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2594*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q2,  #4
2595*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d5,  q3,  #4
2596*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d28, q14, #4
2597*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d29, q15, #4
2598*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [\dst, :128]!
2599*c0909341SAndroid Build Coastguard Worker        vst1.8          {q14}, [\ds2, :128]!
2600*c0909341SAndroid Build Coastguard Worker.else
2601*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [\dst, :128]!
2602*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [\ds2, :128]!
2603*c0909341SAndroid Build Coastguard Worker.endif
2604*c0909341SAndroid Build Coastguard Worker        ble             9f
2605*c0909341SAndroid Build Coastguard Worker
2606*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
2607*c0909341SAndroid Build Coastguard Worker        vmov            d22, d24
2608*c0909341SAndroid Build Coastguard Worker        b               16b
2609*c0909341SAndroid Build Coastguard Worker
2610*c0909341SAndroid Build Coastguard Worker9:
2611*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  \d_strd
2612*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \ds2,  \d_strd
2613*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  \s_strd
2614*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \sr2,  \s_strd
2615*c0909341SAndroid Build Coastguard Worker
2616*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2617*c0909341SAndroid Build Coastguard Worker        bgt             161b
2618*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2619*c0909341SAndroid Build Coastguard Worker
2620*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v):
2621*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
2622*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_bilin_v_tbl)
2623*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
2624*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
2625*c0909341SAndroid Build Coastguard Worker        bx              r9
2626*c0909341SAndroid Build Coastguard Worker
2627*c0909341SAndroid Build Coastguard Worker        .align 2
2628*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_v_tbl):
2629*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2630*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2631*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2632*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2633*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2634*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2635*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_bilin_v_tbl) + CONFIG_THUMB
2636*c0909341SAndroid Build Coastguard Worker
2637*c0909341SAndroid Build Coastguard Worker20:     // 2xN v
2638*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2639*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
2640*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2641*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2642*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2643*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2644*c0909341SAndroid Build Coastguard Worker
2645*c0909341SAndroid Build Coastguard Worker        // 2x2 v
2646*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16[]}, [\src], \s_strd
2647*c0909341SAndroid Build Coastguard Worker        bgt             24f
2648*c0909341SAndroid Build Coastguard Worker22:
2649*c0909341SAndroid Build Coastguard Worker        vld1.16         {d17[]}, [\sr2], \s_strd
2650*c0909341SAndroid Build Coastguard Worker        vld1.16         {d18[]}, [\src], \s_strd
2651*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d17, #6
2652*c0909341SAndroid Build Coastguard Worker        vext.8          d17, d17, d18, #6
2653*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d16, d2
2654*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d17, d3
2655*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q2,  #4
2656*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[0]}, [\dst, :16]
2657*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[1]}, [\ds2, :16]
2658*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2659*c0909341SAndroid Build Coastguard Worker24:     // 2x4, 2x6, 2x8, ... v
2660*c0909341SAndroid Build Coastguard Worker        vld1.16         {d17[]}, [\sr2], \s_strd
2661*c0909341SAndroid Build Coastguard Worker        vld1.16         {d18[]}, [\src], \s_strd
2662*c0909341SAndroid Build Coastguard Worker        vld1.16         {d19[]}, [\sr2], \s_strd
2663*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20[]}, [\src], \s_strd
2664*c0909341SAndroid Build Coastguard Worker        sub             \h,  \h,  #4
2665*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d17, #6
2666*c0909341SAndroid Build Coastguard Worker        vext.8          d17, d17, d18, #6
2667*c0909341SAndroid Build Coastguard Worker        vext.8          d18, d18, d19, #6
2668*c0909341SAndroid Build Coastguard Worker        vext.8          d19, d19, d20, #6
2669*c0909341SAndroid Build Coastguard Worker        vtrn.32         d16, d18
2670*c0909341SAndroid Build Coastguard Worker        vtrn.32         d17, d19
2671*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d16, d2
2672*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d17, d3
2673*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
2674*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q2,  #4
2675*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[0]}, [\dst, :16], \d_strd
2676*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[1]}, [\ds2, :16], \d_strd
2677*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[2]}, [\dst, :16], \d_strd
2678*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4[3]}, [\ds2, :16], \d_strd
2679*c0909341SAndroid Build Coastguard Worker        blt             0f
2680*c0909341SAndroid Build Coastguard Worker        vmov            d16, d20
2681*c0909341SAndroid Build Coastguard Worker        beq             22b
2682*c0909341SAndroid Build Coastguard Worker        b               24b
2683*c0909341SAndroid Build Coastguard Worker0:
2684*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2685*c0909341SAndroid Build Coastguard Worker.endif
2686*c0909341SAndroid Build Coastguard Worker
2687*c0909341SAndroid Build Coastguard Worker40:     // 4xN v
2688*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2689*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2690*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2691*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2692*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16[]}, [\src], \s_strd
2693*c0909341SAndroid Build Coastguard Worker4:
2694*c0909341SAndroid Build Coastguard Worker        vld1.32         {d17[]}, [\sr2], \s_strd
2695*c0909341SAndroid Build Coastguard Worker        vld1.32         {d18[]}, [\src], \s_strd
2696*c0909341SAndroid Build Coastguard Worker        vext.8          d16, d16, d17, #4
2697*c0909341SAndroid Build Coastguard Worker        vext.8          d17, d17, d18, #4
2698*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d16, d2
2699*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d17, d3
2700*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2701*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2702*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q2,  #4
2703*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [\dst, :32], \d_strd
2704*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[1]}, [\ds2, :32], \d_strd
2705*c0909341SAndroid Build Coastguard Worker.else
2706*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [\dst, :64], \d_strd
2707*c0909341SAndroid Build Coastguard Worker        vst1.16         {d5}, [\ds2, :64], \d_strd
2708*c0909341SAndroid Build Coastguard Worker.endif
2709*c0909341SAndroid Build Coastguard Worker        ble             0f
2710*c0909341SAndroid Build Coastguard Worker        vmov            d16,  d18
2711*c0909341SAndroid Build Coastguard Worker        b               4b
2712*c0909341SAndroid Build Coastguard Worker0:
2713*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2714*c0909341SAndroid Build Coastguard Worker
2715*c0909341SAndroid Build Coastguard Worker80:     // 8xN v
2716*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2717*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2718*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2719*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2720*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16}, [\src], \s_strd
2721*c0909341SAndroid Build Coastguard Worker8:
2722*c0909341SAndroid Build Coastguard Worker        vld1.8          {d17}, [\sr2], \s_strd
2723*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18}, [\src], \s_strd
2724*c0909341SAndroid Build Coastguard Worker        vmull.u8        q2,  d16, d2
2725*c0909341SAndroid Build Coastguard Worker        vmull.u8        q3,  d17, d2
2726*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q2,  d17, d3
2727*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q3,  d18, d3
2728*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2729*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2730*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d4,  q2,  #4
2731*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d6,  q3,  #4
2732*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4}, [\dst, :64], \d_strd
2733*c0909341SAndroid Build Coastguard Worker        vst1.8          {d6}, [\ds2, :64], \d_strd
2734*c0909341SAndroid Build Coastguard Worker.else
2735*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [\dst, :128], \d_strd
2736*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3}, [\ds2, :128], \d_strd
2737*c0909341SAndroid Build Coastguard Worker.endif
2738*c0909341SAndroid Build Coastguard Worker        ble             0f
2739*c0909341SAndroid Build Coastguard Worker        vmov            d16, d18
2740*c0909341SAndroid Build Coastguard Worker        b               8b
2741*c0909341SAndroid Build Coastguard Worker0:
2742*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2743*c0909341SAndroid Build Coastguard Worker
2744*c0909341SAndroid Build Coastguard Worker160:    // 16xN, 32xN, ...
2745*c0909341SAndroid Build Coastguard Worker320:
2746*c0909341SAndroid Build Coastguard Worker640:
2747*c0909341SAndroid Build Coastguard Worker1280:
2748*c0909341SAndroid Build Coastguard Worker        mov             \my, \h
2749*c0909341SAndroid Build Coastguard Worker1:
2750*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2751*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
2752*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2753*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2754*c0909341SAndroid Build Coastguard Worker
2755*c0909341SAndroid Build Coastguard Worker        vld1.8          {q8},  [\src], \s_strd
2756*c0909341SAndroid Build Coastguard Worker2:
2757*c0909341SAndroid Build Coastguard Worker        vld1.8          {q9},  [\sr2], \s_strd
2758*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10}, [\src], \s_strd
2759*c0909341SAndroid Build Coastguard Worker        vmull.u8        q12, d16, d2
2760*c0909341SAndroid Build Coastguard Worker        vmull.u8        q13, d17, d2
2761*c0909341SAndroid Build Coastguard Worker        vmull.u8        q14, d18, d2
2762*c0909341SAndroid Build Coastguard Worker        vmull.u8        q15, d19, d2
2763*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q12, d18, d3
2764*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q13, d19, d3
2765*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q14, d20, d3
2766*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q15, d21, d3
2767*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2768*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2769*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d24, q12, #4
2770*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d25, q13, #4
2771*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d28, q14, #4
2772*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d29, q15, #4
2773*c0909341SAndroid Build Coastguard Worker        vst1.8          {q12}, [\dst, :128], \d_strd
2774*c0909341SAndroid Build Coastguard Worker        vst1.8          {q14}, [\ds2, :128], \d_strd
2775*c0909341SAndroid Build Coastguard Worker.else
2776*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [\dst, :128], \d_strd
2777*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [\ds2, :128], \d_strd
2778*c0909341SAndroid Build Coastguard Worker.endif
2779*c0909341SAndroid Build Coastguard Worker        ble             9f
2780*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q10
2781*c0909341SAndroid Build Coastguard Worker        b               2b
2782*c0909341SAndroid Build Coastguard Worker9:
2783*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #16
2784*c0909341SAndroid Build Coastguard Worker        ble             0f
2785*c0909341SAndroid Build Coastguard Worker        asr             \s_strd, \s_strd, #1
2786*c0909341SAndroid Build Coastguard Worker        asr             \d_strd, \d_strd, #1
2787*c0909341SAndroid Build Coastguard Worker        mls             \src, \s_strd, \my, \src
2788*c0909341SAndroid Build Coastguard Worker        mls             \dst, \d_strd, \my, \dst
2789*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #1
2790*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2791*c0909341SAndroid Build Coastguard Worker        add             \src, \src, #16
2792*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2793*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #16
2794*c0909341SAndroid Build Coastguard Worker.else
2795*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #32
2796*c0909341SAndroid Build Coastguard Worker.endif
2797*c0909341SAndroid Build Coastguard Worker        b               1b
2798*c0909341SAndroid Build Coastguard Worker0:
2799*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2800*c0909341SAndroid Build Coastguard Worker
2801*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv):
2802*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d2
2803*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q3,  d3
2804*c0909341SAndroid Build Coastguard Worker        adr             r9,  L(\type\()_bilin_hv_tbl)
2805*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r9, r8, lsl #2]
2806*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8
2807*c0909341SAndroid Build Coastguard Worker        bx              r9
2808*c0909341SAndroid Build Coastguard Worker
2809*c0909341SAndroid Build Coastguard Worker        .align 2
2810*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_hv_tbl):
2811*c0909341SAndroid Build Coastguard Worker        .word 1280f - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2812*c0909341SAndroid Build Coastguard Worker        .word 640f  - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2813*c0909341SAndroid Build Coastguard Worker        .word 320f  - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2814*c0909341SAndroid Build Coastguard Worker        .word 160f  - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2815*c0909341SAndroid Build Coastguard Worker        .word 80f   - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2816*c0909341SAndroid Build Coastguard Worker        .word 40f   - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2817*c0909341SAndroid Build Coastguard Worker        .word 20f   - L(\type\()_bilin_hv_tbl) + CONFIG_THUMB
2818*c0909341SAndroid Build Coastguard Worker
2819*c0909341SAndroid Build Coastguard Worker20:     // 2xN hv
2820*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2821*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
2822*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2823*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2824*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2825*c0909341SAndroid Build Coastguard Worker
2826*c0909341SAndroid Build Coastguard Worker        vld1.32         {d28[]},  [\src], \s_strd
2827*c0909341SAndroid Build Coastguard Worker        vext.8          d29, d28, d28, #1
2828*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d28, d0
2829*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d29, d1
2830*c0909341SAndroid Build Coastguard Worker
2831*c0909341SAndroid Build Coastguard Worker2:
2832*c0909341SAndroid Build Coastguard Worker        vld1.32         {d28[]},  [\sr2], \s_strd
2833*c0909341SAndroid Build Coastguard Worker        vld1.32         {d30[]},  [\src], \s_strd
2834*c0909341SAndroid Build Coastguard Worker        vext.8          d29, d28, d28, #1
2835*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d30, d30, #1
2836*c0909341SAndroid Build Coastguard Worker        vtrn.16         d28, d30
2837*c0909341SAndroid Build Coastguard Worker        vtrn.16         d29, d31
2838*c0909341SAndroid Build Coastguard Worker        vmull.u8        q9,  d28, d0
2839*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q9,  d29, d1
2840*c0909341SAndroid Build Coastguard Worker
2841*c0909341SAndroid Build Coastguard Worker        vtrn.32         d16, d18
2842*c0909341SAndroid Build Coastguard Worker
2843*c0909341SAndroid Build Coastguard Worker        vmul.u16        d20, d16, d4
2844*c0909341SAndroid Build Coastguard Worker        vmla.u16        d20, d19, d6
2845*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d20, q10, #8
2846*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2847*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[0]}, [\dst, :16], \d_strd
2848*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20[1]}, [\ds2, :16], \d_strd
2849*c0909341SAndroid Build Coastguard Worker        ble             0f
2850*c0909341SAndroid Build Coastguard Worker        vtrn.32         d19, d16
2851*c0909341SAndroid Build Coastguard Worker        b               2b
2852*c0909341SAndroid Build Coastguard Worker0:
2853*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2854*c0909341SAndroid Build Coastguard Worker.endif
2855*c0909341SAndroid Build Coastguard Worker
2856*c0909341SAndroid Build Coastguard Worker40:     // 4xN hv
2857*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
2858*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2859*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2860*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2861*c0909341SAndroid Build Coastguard Worker
2862*c0909341SAndroid Build Coastguard Worker        vld1.8          {d28},  [\src], \s_strd
2863*c0909341SAndroid Build Coastguard Worker        vext.8          d29, d28, d28, #1
2864*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d28, d0
2865*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d29, d1
2866*c0909341SAndroid Build Coastguard Worker
2867*c0909341SAndroid Build Coastguard Worker4:
2868*c0909341SAndroid Build Coastguard Worker        vld1.8          {d28},  [\sr2], \s_strd
2869*c0909341SAndroid Build Coastguard Worker        vld1.8          {d30},  [\src], \s_strd
2870*c0909341SAndroid Build Coastguard Worker        vext.8          d29, d28, d28, #1
2871*c0909341SAndroid Build Coastguard Worker        vext.8          d31, d30, d30, #1
2872*c0909341SAndroid Build Coastguard Worker        vtrn.32         d28, d30
2873*c0909341SAndroid Build Coastguard Worker        vtrn.32         d29, d31
2874*c0909341SAndroid Build Coastguard Worker        vmull.u8        q9,  d28, d0
2875*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q9,  d29, d1
2876*c0909341SAndroid Build Coastguard Worker
2877*c0909341SAndroid Build Coastguard Worker        vmov            d17, d18
2878*c0909341SAndroid Build Coastguard Worker
2879*c0909341SAndroid Build Coastguard Worker        vmul.u16        q10, q8, q2
2880*c0909341SAndroid Build Coastguard Worker        vmla.u16        q10, q9, q3
2881*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2882*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2883*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d20, q10, #8
2884*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [\dst, :32], \d_strd
2885*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[1]}, [\ds2, :32], \d_strd
2886*c0909341SAndroid Build Coastguard Worker.else
2887*c0909341SAndroid Build Coastguard Worker        vrshr.u16       q10, q10, #4
2888*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20}, [\dst, :64], \d_strd
2889*c0909341SAndroid Build Coastguard Worker        vst1.16         {d21}, [\ds2, :64], \d_strd
2890*c0909341SAndroid Build Coastguard Worker.endif
2891*c0909341SAndroid Build Coastguard Worker        ble             0f
2892*c0909341SAndroid Build Coastguard Worker        vmov            d16, d19
2893*c0909341SAndroid Build Coastguard Worker        b               4b
2894*c0909341SAndroid Build Coastguard Worker0:
2895*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2896*c0909341SAndroid Build Coastguard Worker
2897*c0909341SAndroid Build Coastguard Worker80:     // 8xN, 16xN, ... hv
2898*c0909341SAndroid Build Coastguard Worker160:
2899*c0909341SAndroid Build Coastguard Worker320:
2900*c0909341SAndroid Build Coastguard Worker640:
2901*c0909341SAndroid Build Coastguard Worker1280:
2902*c0909341SAndroid Build Coastguard Worker        mov             \my, \h
2903*c0909341SAndroid Build Coastguard Worker
2904*c0909341SAndroid Build Coastguard Worker1:
2905*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
2906*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2907*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2908*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2909*c0909341SAndroid Build Coastguard Worker
2910*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12},  [\src], \s_strd
2911*c0909341SAndroid Build Coastguard Worker        vext.8          q13, q12, q12, #1
2912*c0909341SAndroid Build Coastguard Worker        vmull.u8        q8,  d24, d0
2913*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q8,  d26, d1
2914*c0909341SAndroid Build Coastguard Worker
2915*c0909341SAndroid Build Coastguard Worker2:
2916*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12},  [\sr2], \s_strd
2917*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14},  [\src], \s_strd
2918*c0909341SAndroid Build Coastguard Worker        vext.8          q13, q12, q12, #1
2919*c0909341SAndroid Build Coastguard Worker        vext.8          q15, q14, q14, #1
2920*c0909341SAndroid Build Coastguard Worker        vmull.u8        q9,  d24, d0
2921*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q9,  d26, d1
2922*c0909341SAndroid Build Coastguard Worker        vmull.u8        q10, d28, d0
2923*c0909341SAndroid Build Coastguard Worker        vmlal.u8        q10, d30, d1
2924*c0909341SAndroid Build Coastguard Worker
2925*c0909341SAndroid Build Coastguard Worker        vmul.u16        q8,  q8,  q2
2926*c0909341SAndroid Build Coastguard Worker        vmla.u16        q8,  q9,  q3
2927*c0909341SAndroid Build Coastguard Worker        vmul.u16        q9,  q9,  q2
2928*c0909341SAndroid Build Coastguard Worker        vmla.u16        q9,  q10, q3
2929*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2930*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2931*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d16, q8,  #8
2932*c0909341SAndroid Build Coastguard Worker        vqrshrn.u16     d18, q9,  #8
2933*c0909341SAndroid Build Coastguard Worker        vst1.8          {d16}, [\dst, :64], \d_strd
2934*c0909341SAndroid Build Coastguard Worker        vst1.8          {d18}, [\ds2, :64], \d_strd
2935*c0909341SAndroid Build Coastguard Worker.else
2936*c0909341SAndroid Build Coastguard Worker        vrshr.u16       q8,  q8,  #4
2937*c0909341SAndroid Build Coastguard Worker        vrshr.u16       q9,  q9,  #4
2938*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8}, [\dst, :128], \d_strd
2939*c0909341SAndroid Build Coastguard Worker        vst1.16         {q9}, [\ds2, :128], \d_strd
2940*c0909341SAndroid Build Coastguard Worker.endif
2941*c0909341SAndroid Build Coastguard Worker        ble             9f
2942*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q10
2943*c0909341SAndroid Build Coastguard Worker        b               2b
2944*c0909341SAndroid Build Coastguard Worker9:
2945*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
2946*c0909341SAndroid Build Coastguard Worker        ble             0f
2947*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
2948*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
2949*c0909341SAndroid Build Coastguard Worker        mls             \src,  \s_strd,  \my,  \src
2950*c0909341SAndroid Build Coastguard Worker        mls             \dst,  \d_strd,  \my,  \dst
2951*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #1
2952*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2953*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
2954*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2955*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
2956*c0909341SAndroid Build Coastguard Worker.else
2957*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
2958*c0909341SAndroid Build Coastguard Worker.endif
2959*c0909341SAndroid Build Coastguard Worker        b               1b
2960*c0909341SAndroid Build Coastguard Worker0:
2961*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2962*c0909341SAndroid Build Coastguard Workerendfunc
2963*c0909341SAndroid Build Coastguard Worker.endm
2964*c0909341SAndroid Build Coastguard Worker
2965*c0909341SAndroid Build Coastguard Workerfilter_fn put,  r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, 10
2966*c0909341SAndroid Build Coastguard Workerfilter_fn prep, r0, r7, r1, r2, r3, r4, r5, r6, r8, r9, 6
2967*c0909341SAndroid Build Coastguard Worker
2968*c0909341SAndroid Build Coastguard Worker.macro load_filter_ptr src
2969*c0909341SAndroid Build Coastguard Worker        asr             r12, \src, #10
2970*c0909341SAndroid Build Coastguard Worker        add             r12, r11, r12, lsl #3
2971*c0909341SAndroid Build Coastguard Worker.endm
2972*c0909341SAndroid Build Coastguard Worker
2973*c0909341SAndroid Build Coastguard Worker.macro load_filter_coef dst, src, inc
2974*c0909341SAndroid Build Coastguard Worker        add             \src, \src, \inc
2975*c0909341SAndroid Build Coastguard Worker        vld1.8          {\dst}, [r12, :64]
2976*c0909341SAndroid Build Coastguard Worker.endm
2977*c0909341SAndroid Build Coastguard Worker
2978*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc
2979*c0909341SAndroid Build Coastguard Worker        load_filter_ptr \src
2980*c0909341SAndroid Build Coastguard Worker        load_filter_coef \dst, \src, \inc
2981*c0909341SAndroid Build Coastguard Worker.endm
2982*c0909341SAndroid Build Coastguard Worker
2983*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon
2984*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 0
2985*c0909341SAndroid Build Coastguard Worker        vld1.16         {q7}, [r2], r3
2986*c0909341SAndroid Build Coastguard Worker        vmov.i8         q6,  #128
2987*c0909341SAndroid Build Coastguard Worker
2988*c0909341SAndroid Build Coastguard Worker        load_filter_coef d0, r5,  r7        // filter 0
2989*c0909341SAndroid Build Coastguard Worker        load_filter_row d1,  r5,  r7        // filter 1
2990*c0909341SAndroid Build Coastguard Worker        load_filter_row d2,  r5,  r7        // filter 2
2991*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 3
2992*c0909341SAndroid Build Coastguard Worker        veor            q7,  q7,  q6        // subtract by 128 to allow using vmull
2993*c0909341SAndroid Build Coastguard Worker        load_filter_coef d3, r5,  r7        // filter 3
2994*c0909341SAndroid Build Coastguard Worker        vext.8          d12, d14, d15, #1   // filter 1 pixels
2995*c0909341SAndroid Build Coastguard Worker        vext.8          d13, d14, d15, #2   // filter 2 pixels
2996*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 4
2997*c0909341SAndroid Build Coastguard Worker        vmull.s8        q2,  d14, d0        // filter 0 output
2998*c0909341SAndroid Build Coastguard Worker        vmull.s8        q3,  d12, d1        // filter 1 output
2999*c0909341SAndroid Build Coastguard Worker        load_filter_coef d0, r5,  r7        // filter 4
3000*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 5
3001*c0909341SAndroid Build Coastguard Worker        vext.8          d12, d14, d15, #3   // filter 3 pixels
3002*c0909341SAndroid Build Coastguard Worker        vmull.s8        q4,  d13, d2        // filter 2 output
3003*c0909341SAndroid Build Coastguard Worker        vext.8          d13, d14, d15, #4   // filter 4 pixels
3004*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5        // pixel 0 (4x16)
3005*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d6,  d7        // pixel 1 (4x16)
3006*c0909341SAndroid Build Coastguard Worker        load_filter_coef d1, r5,  r7        // filter 5
3007*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 6
3008*c0909341SAndroid Build Coastguard Worker        vmull.s8        q5,  d12, d3        // filter 3 output
3009*c0909341SAndroid Build Coastguard Worker        vext.8          d12, d14, d15, #5   // filter 5 pixels
3010*c0909341SAndroid Build Coastguard Worker        vmull.s8        q3,  d13, d0        // filter 4 output
3011*c0909341SAndroid Build Coastguard Worker        load_filter_coef d0, r5,  r7        // filter 6
3012*c0909341SAndroid Build Coastguard Worker        vext.8          d13, d14, d15, #6   // filter 6 pixels
3013*c0909341SAndroid Build Coastguard Worker        load_filter_ptr r5                  // filter 7
3014*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d8,  d8,  d9        // pixel 2 (4x16)
3015*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d9,  d10, d11       // pixel 3 (4x16)
3016*c0909341SAndroid Build Coastguard Worker        vmull.s8        q5,  d12, d1        // filter 5 output
3017*c0909341SAndroid Build Coastguard Worker        load_filter_coef d1, r5,  r7        // filter 7
3018*c0909341SAndroid Build Coastguard Worker        vext.8          d14, d14, d15, #7   // filter 7 pixels
3019*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d6,  d6,  d7        // pixel 4 (4x16)
3020*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d10, d10, d11       // pixel 5 (4x16)
3021*c0909341SAndroid Build Coastguard Worker        vmull.s8        q6,  d13, d0        // filter 6 output
3022*c0909341SAndroid Build Coastguard Worker        vmull.s8        q7,  d14, d1        // filter 7 output
3023*c0909341SAndroid Build Coastguard Worker
3024*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  r7, lsl #3
3025*c0909341SAndroid Build Coastguard Worker
3026*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5        // pixel 0,1 (2x16)
3027*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d8,  d9        // pixel 2,3 (2x16)
3028*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d12, d12, d13       // pixel 6 (4x16)
3029*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d14, d14, d15       // pixel 7 (4x16)
3030*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d6,  d6,  d10       // pixel 4,5 (2x16)
3031*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d10, d12, d14       // pixel 6,7 (2x16)
3032*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5        // pixel 0-3
3033*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d6,  d10       // pixel 4-7
3034*c0909341SAndroid Build Coastguard Worker
3035*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r8
3036*c0909341SAndroid Build Coastguard Worker
3037*c0909341SAndroid Build Coastguard Worker        bx              lr
3038*c0909341SAndroid Build Coastguard Workerendfunc
3039*c0909341SAndroid Build Coastguard Worker
3040*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_8bpc_neon(
3041*c0909341SAndroid Build Coastguard Worker//         pixel *dst, const ptrdiff_t dst_stride,
3042*c0909341SAndroid Build Coastguard Worker//         const pixel *src, const ptrdiff_t src_stride,
3043*c0909341SAndroid Build Coastguard Worker//         const int16_t *const abcd, int mx, int my)
3044*c0909341SAndroid Build Coastguard Worker.macro warp t, shift
3045*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_8bpc_neon, export=1
3046*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3047*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3048*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #100]
3049*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #108]
3050*c0909341SAndroid Build Coastguard Worker        ldrd            r8,  r9,  [r4]
3051*c0909341SAndroid Build Coastguard Worker        sxth            r7,  r8
3052*c0909341SAndroid Build Coastguard Worker        asr             r8,  r8, #16
3053*c0909341SAndroid Build Coastguard Worker        asr             r4,  r9, #16
3054*c0909341SAndroid Build Coastguard Worker        sxth            r9,  r9
3055*c0909341SAndroid Build Coastguard Worker        mov             r10, #8
3056*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r3, lsl #1
3057*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r3
3058*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #3
3059*c0909341SAndroid Build Coastguard Worker        movrel          r11, X(mc_warp_filter), 64*8
3060*c0909341SAndroid Build Coastguard Worker.ifnb \t
3061*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
3062*c0909341SAndroid Build Coastguard Worker.endif
3063*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  #512
3064*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #512
3065*c0909341SAndroid Build Coastguard Worker
3066*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3067*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q2,  #3
3068*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3069*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q2,  #3
3070*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3071*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q2,  #3
3072*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3073*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q11, q2,  #3
3074*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3075*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q12, q2,  #3
3076*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3077*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q13, q2,  #3
3078*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3079*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q14, q2,  #3
3080*c0909341SAndroid Build Coastguard Worker
3081*c0909341SAndroid Build Coastguard Worker1:
3082*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3083*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q15, q2,  #3
3084*c0909341SAndroid Build Coastguard Worker
3085*c0909341SAndroid Build Coastguard Worker        load_filter_row d8,  r6,  r9
3086*c0909341SAndroid Build Coastguard Worker        load_filter_row d9,  r6,  r9
3087*c0909341SAndroid Build Coastguard Worker        load_filter_row d10, r6,  r9
3088*c0909341SAndroid Build Coastguard Worker        load_filter_row d11, r6,  r9
3089*c0909341SAndroid Build Coastguard Worker        load_filter_row d12, r6,  r9
3090*c0909341SAndroid Build Coastguard Worker        load_filter_row d13, r6,  r9
3091*c0909341SAndroid Build Coastguard Worker        load_filter_row d14, r6,  r9
3092*c0909341SAndroid Build Coastguard Worker        load_filter_row d15, r6,  r9
3093*c0909341SAndroid Build Coastguard Worker        transpose_8x8b  q4,  q5,  q6,  q7,  d8,  d9,  d10, d11, d12, d13, d14, d15
3094*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q1,  d8
3095*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q2,  d9
3096*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q3,  d10
3097*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q4,  d11
3098*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q5,  d12
3099*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q6,  d13
3100*c0909341SAndroid Build Coastguard Worker
3101*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r9, lsl #3
3102*c0909341SAndroid Build Coastguard Worker
3103*c0909341SAndroid Build Coastguard Worker        // This ordering of vmull/vmlal is highly beneficial for
3104*c0909341SAndroid Build Coastguard Worker        // Cortex A8/A9/A53 here, but harmful for Cortex A7.
3105*c0909341SAndroid Build Coastguard Worker        vmull.s16       q0,  d16,  d2
3106*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d18,  d4
3107*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d20,  d6
3108*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d22,  d8
3109*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d24,  d10
3110*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d26,  d12
3111*c0909341SAndroid Build Coastguard Worker        vmull.s16       q1,  d17,  d3
3112*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d19,  d5
3113*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d21,  d7
3114*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d23,  d9
3115*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d25,  d11
3116*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d27,  d13
3117*c0909341SAndroid Build Coastguard Worker
3118*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q2,  d14
3119*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q3,  d15
3120*c0909341SAndroid Build Coastguard Worker
3121*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d28,  d4
3122*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q0,  d30,  d6
3123*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d29,  d5
3124*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d31,  d7
3125*c0909341SAndroid Build Coastguard Worker
3126*c0909341SAndroid Build Coastguard Worker.ifb \t
3127*c0909341SAndroid Build Coastguard Worker        vmov.i16        q7,  #128
3128*c0909341SAndroid Build Coastguard Worker.else
3129*c0909341SAndroid Build Coastguard Worker        vmov.i16        q7,  #0x800
3130*c0909341SAndroid Build Coastguard Worker.endif
3131*c0909341SAndroid Build Coastguard Worker
3132*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q9
3133*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q10
3134*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d0,  q0,  #\shift
3135*c0909341SAndroid Build Coastguard Worker        vmov            q10, q11
3136*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d1,  q1,  #\shift
3137*c0909341SAndroid Build Coastguard Worker        vmov            q11, q12
3138*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q7
3139*c0909341SAndroid Build Coastguard Worker        vmov            q12, q13
3140*c0909341SAndroid Build Coastguard Worker.ifb \t
3141*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q0
3142*c0909341SAndroid Build Coastguard Worker.endif
3143*c0909341SAndroid Build Coastguard Worker        vmov            q13, q14
3144*c0909341SAndroid Build Coastguard Worker        vmov            q14, q15
3145*c0909341SAndroid Build Coastguard Worker        subs            r10, r10, #1
3146*c0909341SAndroid Build Coastguard Worker.ifnb \t
3147*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r0, :128], r1
3148*c0909341SAndroid Build Coastguard Worker.else
3149*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0}, [r0, :64], r1
3150*c0909341SAndroid Build Coastguard Worker.endif
3151*c0909341SAndroid Build Coastguard Worker
3152*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r4
3153*c0909341SAndroid Build Coastguard Worker        bgt             1b
3154*c0909341SAndroid Build Coastguard Worker
3155*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3156*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3157*c0909341SAndroid Build Coastguard Workerendfunc
3158*c0909341SAndroid Build Coastguard Worker.endm
3159*c0909341SAndroid Build Coastguard Worker
3160*c0909341SAndroid Build Coastguard Workerwarp  , 11
3161*c0909341SAndroid Build Coastguard Workerwarp t, 7
3162*c0909341SAndroid Build Coastguard Worker
3163*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_8bpc_neon(
3164*c0909341SAndroid Build Coastguard Worker//         const intptr_t bw, const intptr_t bh,
3165*c0909341SAndroid Build Coastguard Worker//         const intptr_t iw, const intptr_t ih,
3166*c0909341SAndroid Build Coastguard Worker//         const intptr_t x, const intptr_t y,
3167*c0909341SAndroid Build Coastguard Worker//         pixel *dst, const ptrdiff_t dst_stride,
3168*c0909341SAndroid Build Coastguard Worker//         const pixel *ref, const ptrdiff_t ref_stride)
3169*c0909341SAndroid Build Coastguard Workerfunction emu_edge_8bpc_neon, export=1
3170*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3171*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #36]
3172*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #44]
3173*c0909341SAndroid Build Coastguard Worker        ldrd            r8,  r9,  [sp, #52]
3174*c0909341SAndroid Build Coastguard Worker
3175*c0909341SAndroid Build Coastguard Worker        // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride)
3176*c0909341SAndroid Build Coastguard Worker        // ref += iclip(x, 0, iw - 1)
3177*c0909341SAndroid Build Coastguard Worker        sub             r12, r3,  #1           // ih - 1
3178*c0909341SAndroid Build Coastguard Worker        cmp             r5,  r3
3179*c0909341SAndroid Build Coastguard Worker        sub             lr,  r2,  #1           // iw - 1
3180*c0909341SAndroid Build Coastguard Worker        it              lt
3181*c0909341SAndroid Build Coastguard Worker        movlt           r12, r5                // min(y, ih - 1)
3182*c0909341SAndroid Build Coastguard Worker        cmp             r4,  r2
3183*c0909341SAndroid Build Coastguard Worker        bic             r12, r12, r12, asr #31 // max(min(y, ih - 1), 0)
3184*c0909341SAndroid Build Coastguard Worker        it              lt
3185*c0909341SAndroid Build Coastguard Worker        movlt           lr,  r4                // min(x, iw - 1)
3186*c0909341SAndroid Build Coastguard Worker        bic             lr,  lr,  lr,  asr #31 // max(min(x, iw - 1), 0)
3187*c0909341SAndroid Build Coastguard Worker        mla             r8,  r12, r9,  r8      // ref += iclip() * stride
3188*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  lr           // ref += iclip()
3189*c0909341SAndroid Build Coastguard Worker
3190*c0909341SAndroid Build Coastguard Worker        // bottom_ext = iclip(y + bh - ih, 0, bh - 1)
3191*c0909341SAndroid Build Coastguard Worker        // top_ext = iclip(-y, 0, bh - 1)
3192*c0909341SAndroid Build Coastguard Worker        add             r10, r5,  r1           // y + bh
3193*c0909341SAndroid Build Coastguard Worker        neg             r5,  r5                // -y
3194*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r3           // y + bh - ih
3195*c0909341SAndroid Build Coastguard Worker        sub             r12, r1,  #1           // bh - 1
3196*c0909341SAndroid Build Coastguard Worker        cmp             r10, r1
3197*c0909341SAndroid Build Coastguard Worker        bic             r5,  r5,  r5,  asr #31 // max(-y, 0)
3198*c0909341SAndroid Build Coastguard Worker        it              ge
3199*c0909341SAndroid Build Coastguard Worker        movge           r10, r12               // min(y + bh - ih, bh-1)
3200*c0909341SAndroid Build Coastguard Worker        cmp             r5,  r1
3201*c0909341SAndroid Build Coastguard Worker        bic             r10, r10, r10, asr #31 // max(min(y + bh - ih, bh-1), 0)
3202*c0909341SAndroid Build Coastguard Worker        it              ge
3203*c0909341SAndroid Build Coastguard Worker        movge           r5,  r12               // min(max(-y, 0), bh-1)
3204*c0909341SAndroid Build Coastguard Worker
3205*c0909341SAndroid Build Coastguard Worker        // right_ext = iclip(x + bw - iw, 0, bw - 1)
3206*c0909341SAndroid Build Coastguard Worker        // left_ext = iclip(-x, 0, bw - 1)
3207*c0909341SAndroid Build Coastguard Worker        add             r11, r4,  r0           // x + bw
3208*c0909341SAndroid Build Coastguard Worker        neg             r4,  r4                // -x
3209*c0909341SAndroid Build Coastguard Worker        sub             r11, r11, r2           // x + bw - iw
3210*c0909341SAndroid Build Coastguard Worker        sub             lr,  r0,  #1           // bw - 1
3211*c0909341SAndroid Build Coastguard Worker        cmp             r11, r0
3212*c0909341SAndroid Build Coastguard Worker        bic             r4,  r4,  r4,  asr #31 // max(-x, 0)
3213*c0909341SAndroid Build Coastguard Worker        it              ge
3214*c0909341SAndroid Build Coastguard Worker        movge           r11, lr                // min(x + bw - iw, bw-1)
3215*c0909341SAndroid Build Coastguard Worker        cmp             r4,  r0
3216*c0909341SAndroid Build Coastguard Worker        bic             r11, r11, r11, asr #31 // max(min(x + bw - iw, bw-1), 0)
3217*c0909341SAndroid Build Coastguard Worker        it              ge
3218*c0909341SAndroid Build Coastguard Worker        movge           r4,  lr                // min(max(-x, 0), bw - 1)
3219*c0909341SAndroid Build Coastguard Worker
3220*c0909341SAndroid Build Coastguard Worker        // center_h = bh - top_ext - bottom_ext
3221*c0909341SAndroid Build Coastguard Worker        // dst += top_ext * PXSTRIDE(dst_stride)
3222*c0909341SAndroid Build Coastguard Worker        // center_w = bw - left_ext - right_ext
3223*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r5           // bh - top_ext
3224*c0909341SAndroid Build Coastguard Worker        mla             r6,  r5,  r7,  r6
3225*c0909341SAndroid Build Coastguard Worker        sub             r2,  r0,  r4           // bw - left_ext
3226*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r10          // center_h = bh - top_ext - bottom_ext
3227*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r11          // center_w = bw - left_ext - right_ext
3228*c0909341SAndroid Build Coastguard Worker
3229*c0909341SAndroid Build Coastguard Worker        mov             r0,  r6                // backup of dst
3230*c0909341SAndroid Build Coastguard Worker
3231*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right
3232*c0909341SAndroid Build Coastguard Worker0:
3233*c0909341SAndroid Build Coastguard Worker.if \need_left
3234*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[], d1[]}, [r8]
3235*c0909341SAndroid Build Coastguard Worker        mov             r12, r6                // out = dst
3236*c0909341SAndroid Build Coastguard Worker        mov             r3,  r4
3237*c0909341SAndroid Build Coastguard Worker1:
3238*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
3239*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0}, [r12, :128]!
3240*c0909341SAndroid Build Coastguard Worker        bgt             1b
3241*c0909341SAndroid Build Coastguard Worker.endif
3242*c0909341SAndroid Build Coastguard Worker        mov             lr,  r8
3243*c0909341SAndroid Build Coastguard Worker        add             r12, r6,  r4           // out = dst + left_ext
3244*c0909341SAndroid Build Coastguard Worker        mov             r3,  r2
3245*c0909341SAndroid Build Coastguard Worker1:
3246*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [lr]!
3247*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #32
3248*c0909341SAndroid Build Coastguard Worker.if \need_left
3249*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r12]!
3250*c0909341SAndroid Build Coastguard Worker.else
3251*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r12, :128]!
3252*c0909341SAndroid Build Coastguard Worker.endif
3253*c0909341SAndroid Build Coastguard Worker        bgt             1b
3254*c0909341SAndroid Build Coastguard Worker.if \need_right
3255*c0909341SAndroid Build Coastguard Worker        add             r3,  r8,  r2           // in + center_w
3256*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #1           // in + center_w - 1
3257*c0909341SAndroid Build Coastguard Worker        add             r12, r6,  r4           // dst + left_ext
3258*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[], d1[]}, [r3]
3259*c0909341SAndroid Build Coastguard Worker        add             r12, r12, r2           // out = dst + left_ext + center_w
3260*c0909341SAndroid Build Coastguard Worker        mov             r3,  r11
3261*c0909341SAndroid Build Coastguard Worker1:
3262*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
3263*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0}, [r12]!
3264*c0909341SAndroid Build Coastguard Worker        bgt             1b
3265*c0909341SAndroid Build Coastguard Worker.endif
3266*c0909341SAndroid Build Coastguard Worker
3267*c0909341SAndroid Build Coastguard Worker        subs            r1,  r1,  #1           // center_h--
3268*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r7
3269*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  r9
3270*c0909341SAndroid Build Coastguard Worker        bgt             0b
3271*c0909341SAndroid Build Coastguard Worker.endm
3272*c0909341SAndroid Build Coastguard Worker
3273*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3274*c0909341SAndroid Build Coastguard Worker        beq             2f
3275*c0909341SAndroid Build Coastguard Worker        // need_left
3276*c0909341SAndroid Build Coastguard Worker        cmp             r11, #0
3277*c0909341SAndroid Build Coastguard Worker        beq             3f
3278*c0909341SAndroid Build Coastguard Worker        // need_left + need_right
3279*c0909341SAndroid Build Coastguard Worker        v_loop          1,   1
3280*c0909341SAndroid Build Coastguard Worker        b               5f
3281*c0909341SAndroid Build Coastguard Worker
3282*c0909341SAndroid Build Coastguard Worker2:
3283*c0909341SAndroid Build Coastguard Worker        // !need_left
3284*c0909341SAndroid Build Coastguard Worker        cmp             r11, #0
3285*c0909341SAndroid Build Coastguard Worker        beq             4f
3286*c0909341SAndroid Build Coastguard Worker        // !need_left + need_right
3287*c0909341SAndroid Build Coastguard Worker        v_loop          0,   1
3288*c0909341SAndroid Build Coastguard Worker        b               5f
3289*c0909341SAndroid Build Coastguard Worker
3290*c0909341SAndroid Build Coastguard Worker3:
3291*c0909341SAndroid Build Coastguard Worker        // need_left + !need_right
3292*c0909341SAndroid Build Coastguard Worker        v_loop          1,   0
3293*c0909341SAndroid Build Coastguard Worker        b               5f
3294*c0909341SAndroid Build Coastguard Worker
3295*c0909341SAndroid Build Coastguard Worker4:
3296*c0909341SAndroid Build Coastguard Worker        // !need_left + !need_right
3297*c0909341SAndroid Build Coastguard Worker        v_loop          0,   0
3298*c0909341SAndroid Build Coastguard Worker
3299*c0909341SAndroid Build Coastguard Worker5:
3300*c0909341SAndroid Build Coastguard Worker        cmp             r10, #0
3301*c0909341SAndroid Build Coastguard Worker        // Storing the original dst in r0 overwrote bw, recalculate it here
3302*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  r4           // center_w + left_ext
3303*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  r11          // bw = center_w + left_ext + right_ext
3304*c0909341SAndroid Build Coastguard Worker
3305*c0909341SAndroid Build Coastguard Worker        beq             3f
3306*c0909341SAndroid Build Coastguard Worker        // need_bottom
3307*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r7           // ref = dst - stride
3308*c0909341SAndroid Build Coastguard Worker        mov             r4,  r2
3309*c0909341SAndroid Build Coastguard Worker1:
3310*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [r8, :128]!
3311*c0909341SAndroid Build Coastguard Worker        mov             r3,  r10
3312*c0909341SAndroid Build Coastguard Worker2:
3313*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #1
3314*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r6, :128], r7
3315*c0909341SAndroid Build Coastguard Worker        bgt             2b
3316*c0909341SAndroid Build Coastguard Worker        mls             r6,  r7,  r10,  r6     // dst -= bottom_ext * stride
3317*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #32          // bw -= 32
3318*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #32          // dst += 32
3319*c0909341SAndroid Build Coastguard Worker        bgt             1b
3320*c0909341SAndroid Build Coastguard Worker
3321*c0909341SAndroid Build Coastguard Worker3:
3322*c0909341SAndroid Build Coastguard Worker        cmp             r5,  #0
3323*c0909341SAndroid Build Coastguard Worker        beq             3f
3324*c0909341SAndroid Build Coastguard Worker        // need_top
3325*c0909341SAndroid Build Coastguard Worker        mls             r6,  r7,  r5,  r0      // dst = stored_dst - top_ext * stride
3326*c0909341SAndroid Build Coastguard Worker1:
3327*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [r0, :128]!
3328*c0909341SAndroid Build Coastguard Worker        mov             r3,  r5
3329*c0909341SAndroid Build Coastguard Worker2:
3330*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #1
3331*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r6, :128], r7
3332*c0909341SAndroid Build Coastguard Worker        bgt             2b
3333*c0909341SAndroid Build Coastguard Worker        mls             r6,  r7,  r5,  r6      // dst -= top_ext * stride
3334*c0909341SAndroid Build Coastguard Worker        subs            r2,  r2,  #32          // bw -= 32
3335*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #32          // dst += 32
3336*c0909341SAndroid Build Coastguard Worker        bgt             1b
3337*c0909341SAndroid Build Coastguard Worker
3338*c0909341SAndroid Build Coastguard Worker3:
3339*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3340*c0909341SAndroid Build Coastguard Workerendfunc
3341