xref: /aosp_15_r20/external/libdav1d/src/arm/64/mc.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Martin Storsjo
5*c0909341SAndroid Build Coastguard Worker * All rights reserved.
6*c0909341SAndroid Build Coastguard Worker *
7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
9*c0909341SAndroid Build Coastguard Worker *
10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
11*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
12*c0909341SAndroid Build Coastguard Worker *
13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
14*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
15*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
16*c0909341SAndroid Build Coastguard Worker *
17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*c0909341SAndroid Build Coastguard Worker */
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
30*c0909341SAndroid Build Coastguard Worker#include "util.S"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker.macro avg dst, t0, t1, t2, t3
33*c0909341SAndroid Build Coastguard Worker        ld1             {\t0\().8h,\t1\().8h},   [x2],  32
34*c0909341SAndroid Build Coastguard Worker        ld1             {\t2\().8h,\t3\().8h},   [x3],  32
35*c0909341SAndroid Build Coastguard Worker        add             \t0\().8h,   \t0\().8h,   \t2\().8h
36*c0909341SAndroid Build Coastguard Worker        add             \t1\().8h,   \t1\().8h,   \t3\().8h
37*c0909341SAndroid Build Coastguard Worker        sqrshrun        \dst\().8b,  \t0\().8h,   #5
38*c0909341SAndroid Build Coastguard Worker        sqrshrun2       \dst\().16b, \t1\().8h,   #5
39*c0909341SAndroid Build Coastguard Worker.endm
40*c0909341SAndroid Build Coastguard Worker
41*c0909341SAndroid Build Coastguard Worker.macro w_avg dst, t0, t1, t2, t3
42*c0909341SAndroid Build Coastguard Worker        ld1             {\t0\().8h,\t1\().8h},   [x2],  32
43*c0909341SAndroid Build Coastguard Worker        ld1             {\t2\().8h,\t3\().8h},   [x3],  32
44*c0909341SAndroid Build Coastguard Worker        sub             \t0\().8h,   \t2\().8h,   \t0\().8h
45*c0909341SAndroid Build Coastguard Worker        sub             \t1\().8h,   \t3\().8h,   \t1\().8h
46*c0909341SAndroid Build Coastguard Worker        sqdmulh         \t0\().8h,   \t0\().8h,   v30.8h
47*c0909341SAndroid Build Coastguard Worker        sqdmulh         \t1\().8h,   \t1\().8h,   v30.8h
48*c0909341SAndroid Build Coastguard Worker        add             \t0\().8h,   \t2\().8h,   \t0\().8h
49*c0909341SAndroid Build Coastguard Worker        add             \t1\().8h,   \t3\().8h,   \t1\().8h
50*c0909341SAndroid Build Coastguard Worker        sqrshrun        \dst\().8b,  \t0\().8h,   #4
51*c0909341SAndroid Build Coastguard Worker        sqrshrun2       \dst\().16b, \t1\().8h,   #4
52*c0909341SAndroid Build Coastguard Worker.endm
53*c0909341SAndroid Build Coastguard Worker
54*c0909341SAndroid Build Coastguard Worker.macro mask dst, t0, t1, t2, t3
55*c0909341SAndroid Build Coastguard Worker        ld1             {v30.16b}, [x6],  16
56*c0909341SAndroid Build Coastguard Worker        ld1             {\t0\().8h,\t1\().8h},   [x2],  32
57*c0909341SAndroid Build Coastguard Worker        mul             v30.16b, v30.16b, v31.16b
58*c0909341SAndroid Build Coastguard Worker        ld1             {\t2\().8h,\t3\().8h},   [x3],  32
59*c0909341SAndroid Build Coastguard Worker        shll            v28.8h, v30.8b,  #8
60*c0909341SAndroid Build Coastguard Worker        shll2           v29.8h, v30.16b, #8
61*c0909341SAndroid Build Coastguard Worker        sub             \t0\().8h,   \t2\().8h,   \t0\().8h
62*c0909341SAndroid Build Coastguard Worker        sub             \t1\().8h,   \t3\().8h,   \t1\().8h
63*c0909341SAndroid Build Coastguard Worker        sqdmulh         \t0\().8h,   \t0\().8h,   v28.8h
64*c0909341SAndroid Build Coastguard Worker        sqdmulh         \t1\().8h,   \t1\().8h,   v29.8h
65*c0909341SAndroid Build Coastguard Worker        add             \t0\().8h,   \t2\().8h,   \t0\().8h
66*c0909341SAndroid Build Coastguard Worker        add             \t1\().8h,   \t3\().8h,   \t1\().8h
67*c0909341SAndroid Build Coastguard Worker        sqrshrun        \dst\().8b,  \t0\().8h,   #4
68*c0909341SAndroid Build Coastguard Worker        sqrshrun2       \dst\().16b, \t1\().8h,   #4
69*c0909341SAndroid Build Coastguard Worker.endm
70*c0909341SAndroid Build Coastguard Worker
71*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type
72*c0909341SAndroid Build Coastguard Workerfunction \type\()_8bpc_neon, export=1
73*c0909341SAndroid Build Coastguard Worker        clz             w4,  w4
74*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg
75*c0909341SAndroid Build Coastguard Worker        dup             v30.8h, w6
76*c0909341SAndroid Build Coastguard Worker        neg             v30.8h, v30.8h
77*c0909341SAndroid Build Coastguard Worker        shl             v30.8h, v30.8h, #11
78*c0909341SAndroid Build Coastguard Worker.endif
79*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
80*c0909341SAndroid Build Coastguard Worker        movi            v31.16b, #256-2
81*c0909341SAndroid Build Coastguard Worker.endif
82*c0909341SAndroid Build Coastguard Worker        movrel          x7,  \type\()_tbl
83*c0909341SAndroid Build Coastguard Worker        sub             w4,  w4,  #24
84*c0909341SAndroid Build Coastguard Worker        ldrsw           x4,  [x7, x4, lsl #2]
85*c0909341SAndroid Build Coastguard Worker        \type           v4,  v0,  v1,  v2,  v3
86*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x4
87*c0909341SAndroid Build Coastguard Worker        br              x7
88*c0909341SAndroid Build Coastguard Worker40:
89*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
90*c0909341SAndroid Build Coastguard Worker        add             x7,  x0,  x1
91*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
92*c0909341SAndroid Build Coastguard Worker4:
93*c0909341SAndroid Build Coastguard Worker        cmp             w5,  #4
94*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[0],  [x0], x1
95*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[1],  [x7], x1
96*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[2],  [x0], x1
97*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[3],  [x7], x1
98*c0909341SAndroid Build Coastguard Worker        b.eq            0f
99*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
100*c0909341SAndroid Build Coastguard Worker        cmp             w5,  #8
101*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[0],  [x0], x1
102*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[1],  [x7], x1
103*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[2],  [x0], x1
104*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[3],  [x7], x1
105*c0909341SAndroid Build Coastguard Worker        b.eq            0f
106*c0909341SAndroid Build Coastguard Worker        \type           v4,  v0,  v1,  v2,  v3
107*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[0],  [x0], x1
108*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[1],  [x7], x1
109*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
110*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[2],  [x0], x1
111*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[3],  [x7], x1
112*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[0],  [x0], x1
113*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[1],  [x7], x1
114*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[2],  [x0], x1
115*c0909341SAndroid Build Coastguard Worker        st1             {v5.s}[3],  [x7], x1
116*c0909341SAndroid Build Coastguard Worker        ret
117*c0909341SAndroid Build Coastguard Worker80:
118*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
119*c0909341SAndroid Build Coastguard Worker        add             x7,  x0,  x1
120*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
121*c0909341SAndroid Build Coastguard Worker8:
122*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b},    [x0], x1
123*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
124*c0909341SAndroid Build Coastguard Worker        st1             {v4.d}[1],  [x7], x1
125*c0909341SAndroid Build Coastguard Worker        st1             {v5.8b},    [x0], x1
126*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #4
127*c0909341SAndroid Build Coastguard Worker        st1             {v5.d}[1],  [x7], x1
128*c0909341SAndroid Build Coastguard Worker        b.le            0f
129*c0909341SAndroid Build Coastguard Worker        \type           v4,  v0,  v1,  v2,  v3
130*c0909341SAndroid Build Coastguard Worker        b               8b
131*c0909341SAndroid Build Coastguard Worker160:
132*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
133*c0909341SAndroid Build Coastguard Worker16:
134*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
135*c0909341SAndroid Build Coastguard Worker        st1             {v4.16b}, [x0], x1
136*c0909341SAndroid Build Coastguard Worker        \type           v6,  v0,  v1,  v2,  v3
137*c0909341SAndroid Build Coastguard Worker        st1             {v5.16b}, [x0], x1
138*c0909341SAndroid Build Coastguard Worker        \type           v7,  v0,  v1,  v2,  v3
139*c0909341SAndroid Build Coastguard Worker        st1             {v6.16b}, [x0], x1
140*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #4
141*c0909341SAndroid Build Coastguard Worker        st1             {v7.16b}, [x0], x1
142*c0909341SAndroid Build Coastguard Worker        b.le            0f
143*c0909341SAndroid Build Coastguard Worker        \type           v4,  v0,  v1,  v2,  v3
144*c0909341SAndroid Build Coastguard Worker        b               16b
145*c0909341SAndroid Build Coastguard Worker320:
146*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
147*c0909341SAndroid Build Coastguard Worker        add             x7,  x0,  x1
148*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
149*c0909341SAndroid Build Coastguard Worker32:
150*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
151*c0909341SAndroid Build Coastguard Worker        \type           v6,  v0,  v1,  v2,  v3
152*c0909341SAndroid Build Coastguard Worker        st1             {v4.16b,v5.16b}, [x0], x1
153*c0909341SAndroid Build Coastguard Worker        \type           v7,  v0,  v1,  v2,  v3
154*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #2
155*c0909341SAndroid Build Coastguard Worker        st1             {v6.16b,v7.16b}, [x7], x1
156*c0909341SAndroid Build Coastguard Worker        b.le            0f
157*c0909341SAndroid Build Coastguard Worker        \type           v4,  v0,  v1,  v2,  v3
158*c0909341SAndroid Build Coastguard Worker        b               32b
159*c0909341SAndroid Build Coastguard Worker640:
160*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
161*c0909341SAndroid Build Coastguard Worker        add             x7,  x0,  x1
162*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
163*c0909341SAndroid Build Coastguard Worker64:
164*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
165*c0909341SAndroid Build Coastguard Worker        \type           v6,  v0,  v1,  v2,  v3
166*c0909341SAndroid Build Coastguard Worker        \type           v7,  v0,  v1,  v2,  v3
167*c0909341SAndroid Build Coastguard Worker        \type           v16, v0,  v1,  v2,  v3
168*c0909341SAndroid Build Coastguard Worker        \type           v17, v0,  v1,  v2,  v3
169*c0909341SAndroid Build Coastguard Worker        st1             {v4.16b,v5.16b,v6.16b,v7.16b}, [x0], x1
170*c0909341SAndroid Build Coastguard Worker        \type           v18, v0,  v1,  v2,  v3
171*c0909341SAndroid Build Coastguard Worker        \type           v19, v0,  v1,  v2,  v3
172*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #2
173*c0909341SAndroid Build Coastguard Worker        st1             {v16.16b,v17.16b,v18.16b,v19.16b}, [x7], x1
174*c0909341SAndroid Build Coastguard Worker        b.le            0f
175*c0909341SAndroid Build Coastguard Worker        \type           v4, v0,  v1,  v2,  v3
176*c0909341SAndroid Build Coastguard Worker        b               64b
177*c0909341SAndroid Build Coastguard Worker1280:
178*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
179*c0909341SAndroid Build Coastguard Worker        add             x7,  x0,  #64
180*c0909341SAndroid Build Coastguard Worker128:
181*c0909341SAndroid Build Coastguard Worker        \type           v5,  v0,  v1,  v2,  v3
182*c0909341SAndroid Build Coastguard Worker        \type           v6,  v0,  v1,  v2,  v3
183*c0909341SAndroid Build Coastguard Worker        \type           v7,  v0,  v1,  v2,  v3
184*c0909341SAndroid Build Coastguard Worker        \type           v16, v0,  v1,  v2,  v3
185*c0909341SAndroid Build Coastguard Worker        \type           v17, v0,  v1,  v2,  v3
186*c0909341SAndroid Build Coastguard Worker        st1             {v4.16b,v5.16b,v6.16b,v7.16b}, [x0], x1
187*c0909341SAndroid Build Coastguard Worker        \type           v18, v0,  v1,  v2,  v3
188*c0909341SAndroid Build Coastguard Worker        \type           v19, v0,  v1,  v2,  v3
189*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #1
190*c0909341SAndroid Build Coastguard Worker        st1             {v16.16b,v17.16b,v18.16b,v19.16b}, [x7], x1
191*c0909341SAndroid Build Coastguard Worker        b.le            0f
192*c0909341SAndroid Build Coastguard Worker        \type           v4, v0,  v1,  v2,  v3
193*c0909341SAndroid Build Coastguard Worker        b               128b
194*c0909341SAndroid Build Coastguard Worker0:
195*c0909341SAndroid Build Coastguard Worker        ret
196*c0909341SAndroid Build Coastguard Workerendfunc
197*c0909341SAndroid Build Coastguard Worker
198*c0909341SAndroid Build Coastguard Workerjumptable \type\()_tbl
199*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_tbl
200*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_tbl
201*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_tbl
202*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_tbl
203*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_tbl
204*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_tbl
205*c0909341SAndroid Build Coastguard Workerendjumptable
206*c0909341SAndroid Build Coastguard Worker.endm
207*c0909341SAndroid Build Coastguard Worker
208*c0909341SAndroid Build Coastguard Workerbidir_fn avg
209*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg
210*c0909341SAndroid Build Coastguard Workerbidir_fn mask
211*c0909341SAndroid Build Coastguard Worker
212*c0909341SAndroid Build Coastguard Worker
213*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type
214*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_8bpc_neon, export=1
215*c0909341SAndroid Build Coastguard Worker        clz             w8,  w4
216*c0909341SAndroid Build Coastguard Worker        movrel          x9,  w_mask_\type\()_tbl
217*c0909341SAndroid Build Coastguard Worker        sub             w8,  w8,  #24
218*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9,  x8,  lsl #2]
219*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
220*c0909341SAndroid Build Coastguard Worker        mov             w10, #6903
221*c0909341SAndroid Build Coastguard Worker        dup             v0.8h,   w10
222*c0909341SAndroid Build Coastguard Worker.if \type == 444
223*c0909341SAndroid Build Coastguard Worker        movi            v1.16b,  #64
224*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
225*c0909341SAndroid Build Coastguard Worker        dup             v2.8b,   w7
226*c0909341SAndroid Build Coastguard Worker        movi            v3.8b,   #129
227*c0909341SAndroid Build Coastguard Worker        sub             v3.8b,   v3.8b,   v2.8b
228*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
229*c0909341SAndroid Build Coastguard Worker        dup             v2.8h,   w7
230*c0909341SAndroid Build Coastguard Worker        movi            v3.8h,   #1, lsl #8
231*c0909341SAndroid Build Coastguard Worker        sub             v3.8h,   v3.8h,   v2.8h
232*c0909341SAndroid Build Coastguard Worker.endif
233*c0909341SAndroid Build Coastguard Worker        add             x12,  x0,  x1
234*c0909341SAndroid Build Coastguard Worker        lsl             x1,   x1,  #1
235*c0909341SAndroid Build Coastguard Worker        br              x9
236*c0909341SAndroid Build Coastguard Worker40:
237*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
238*c0909341SAndroid Build Coastguard Worker4:
239*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h,   v5.8h},   [x2],  #32  // tmp1 (four rows at once)
240*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8h,   v7.8h},   [x3],  #32  // tmp2 (four rows at once)
241*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #4
242*c0909341SAndroid Build Coastguard Worker        sub             v16.8h,  v6.8h,   v4.8h
243*c0909341SAndroid Build Coastguard Worker        sub             v17.8h,  v7.8h,   v5.8h
244*c0909341SAndroid Build Coastguard Worker        sabd            v18.8h,  v4.8h,   v6.8h
245*c0909341SAndroid Build Coastguard Worker        sabd            v19.8h,  v5.8h,   v7.8h
246*c0909341SAndroid Build Coastguard Worker        uqsub           v18.8h,  v0.8h,   v18.8h
247*c0909341SAndroid Build Coastguard Worker        uqsub           v19.8h,  v0.8h,   v19.8h
248*c0909341SAndroid Build Coastguard Worker        ushr            v18.8h,  v18.8h,  #8
249*c0909341SAndroid Build Coastguard Worker        ushr            v19.8h,  v19.8h,  #8
250*c0909341SAndroid Build Coastguard Worker        shl             v20.8h,  v18.8h,  #9
251*c0909341SAndroid Build Coastguard Worker        shl             v21.8h,  v19.8h,  #9
252*c0909341SAndroid Build Coastguard Worker        sqdmulh         v20.8h,  v20.8h,  v16.8h
253*c0909341SAndroid Build Coastguard Worker        sqdmulh         v21.8h,  v21.8h,  v17.8h
254*c0909341SAndroid Build Coastguard Worker        add             v20.8h,  v20.8h,  v4.8h
255*c0909341SAndroid Build Coastguard Worker        add             v21.8h,  v21.8h,  v5.8h
256*c0909341SAndroid Build Coastguard Worker        sqrshrun        v22.8b,  v20.8h,  #4
257*c0909341SAndroid Build Coastguard Worker        sqrshrun        v23.8b,  v21.8h,  #4
258*c0909341SAndroid Build Coastguard Worker.if \type == 444
259*c0909341SAndroid Build Coastguard Worker        uzp1            v18.16b,  v18.16b, v19.16b      // Same as xtn, xtn2
260*c0909341SAndroid Build Coastguard Worker        sub             v18.16b,  v1.16b,  v18.16b
261*c0909341SAndroid Build Coastguard Worker        st1             {v18.16b}, [x6],  #16
262*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
263*c0909341SAndroid Build Coastguard Worker        addp            v18.8h,   v18.8h,  v19.8h
264*c0909341SAndroid Build Coastguard Worker        xtn             v18.8b,   v18.8h
265*c0909341SAndroid Build Coastguard Worker        uhsub           v18.8b,   v3.8b,   v18.8b
266*c0909341SAndroid Build Coastguard Worker        st1             {v18.8b},  [x6],  #8
267*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
268*c0909341SAndroid Build Coastguard Worker        trn1            v24.2d,   v18.2d,  v19.2d
269*c0909341SAndroid Build Coastguard Worker        trn2            v25.2d,   v18.2d,  v19.2d
270*c0909341SAndroid Build Coastguard Worker        add             v24.8h,   v24.8h,  v25.8h
271*c0909341SAndroid Build Coastguard Worker        addp            v18.8h,   v24.8h,  v24.8h
272*c0909341SAndroid Build Coastguard Worker        sub             v18.4h,   v3.4h,   v18.4h
273*c0909341SAndroid Build Coastguard Worker        rshrn           v18.8b,   v18.8h,  #2
274*c0909341SAndroid Build Coastguard Worker        str             s18,         [x6],  #4
275*c0909341SAndroid Build Coastguard Worker.endif
276*c0909341SAndroid Build Coastguard Worker        st1             {v22.s}[0],  [x0],  x1
277*c0909341SAndroid Build Coastguard Worker        st1             {v22.s}[1],  [x12], x1
278*c0909341SAndroid Build Coastguard Worker        st1             {v23.s}[0],  [x0],  x1
279*c0909341SAndroid Build Coastguard Worker        st1             {v23.s}[1],  [x12], x1
280*c0909341SAndroid Build Coastguard Worker        b.gt            4b
281*c0909341SAndroid Build Coastguard Worker        ret
282*c0909341SAndroid Build Coastguard Worker80:
283*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
284*c0909341SAndroid Build Coastguard Worker8:
285*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h,   v5.8h},   [x2],  #32
286*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8h,   v7.8h},   [x3],  #32
287*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #2
288*c0909341SAndroid Build Coastguard Worker        sub             v16.8h,  v6.8h,   v4.8h
289*c0909341SAndroid Build Coastguard Worker        sub             v17.8h,  v7.8h,   v5.8h
290*c0909341SAndroid Build Coastguard Worker        sabd            v18.8h,  v4.8h,   v6.8h
291*c0909341SAndroid Build Coastguard Worker        sabd            v19.8h,  v5.8h,   v7.8h
292*c0909341SAndroid Build Coastguard Worker        uqsub           v18.8h,  v0.8h,   v18.8h
293*c0909341SAndroid Build Coastguard Worker        uqsub           v19.8h,  v0.8h,   v19.8h
294*c0909341SAndroid Build Coastguard Worker        ushr            v18.8h,  v18.8h,  #8
295*c0909341SAndroid Build Coastguard Worker        ushr            v19.8h,  v19.8h,  #8
296*c0909341SAndroid Build Coastguard Worker        shl             v20.8h,  v18.8h,  #9
297*c0909341SAndroid Build Coastguard Worker        shl             v21.8h,  v19.8h,  #9
298*c0909341SAndroid Build Coastguard Worker        sqdmulh         v20.8h,  v20.8h,  v16.8h
299*c0909341SAndroid Build Coastguard Worker        sqdmulh         v21.8h,  v21.8h,  v17.8h
300*c0909341SAndroid Build Coastguard Worker        add             v20.8h,  v20.8h,  v4.8h
301*c0909341SAndroid Build Coastguard Worker        add             v21.8h,  v21.8h,  v5.8h
302*c0909341SAndroid Build Coastguard Worker        sqrshrun        v22.8b,  v20.8h,  #4
303*c0909341SAndroid Build Coastguard Worker        sqrshrun        v23.8b,  v21.8h,  #4
304*c0909341SAndroid Build Coastguard Worker.if \type == 444
305*c0909341SAndroid Build Coastguard Worker        uzp1            v18.16b, v18.16b, v19.16b       // Same as xtn, xtn2
306*c0909341SAndroid Build Coastguard Worker        sub             v18.16b, v1.16b,  v18.16b
307*c0909341SAndroid Build Coastguard Worker        st1             {v18.16b}, [x6],  #16
308*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
309*c0909341SAndroid Build Coastguard Worker        addp            v18.8h,  v18.8h,  v19.8h
310*c0909341SAndroid Build Coastguard Worker        xtn             v18.8b,  v18.8h
311*c0909341SAndroid Build Coastguard Worker        uhsub           v18.8b,  v3.8b,   v18.8b
312*c0909341SAndroid Build Coastguard Worker        st1             {v18.8b},  [x6],  #8
313*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
314*c0909341SAndroid Build Coastguard Worker        add             v18.8h,  v18.8h,  v19.8h
315*c0909341SAndroid Build Coastguard Worker        addp            v18.8h,  v18.8h,  v18.8h
316*c0909341SAndroid Build Coastguard Worker        sub             v18.4h,  v3.4h,   v18.4h
317*c0909341SAndroid Build Coastguard Worker        rshrn           v18.8b,  v18.8h,  #2
318*c0909341SAndroid Build Coastguard Worker        str             s18,       [x6],  #4
319*c0909341SAndroid Build Coastguard Worker.endif
320*c0909341SAndroid Build Coastguard Worker        st1             {v22.8b},  [x0],  x1
321*c0909341SAndroid Build Coastguard Worker        st1             {v23.8b},  [x12], x1
322*c0909341SAndroid Build Coastguard Worker        b.gt            8b
323*c0909341SAndroid Build Coastguard Worker        ret
324*c0909341SAndroid Build Coastguard Worker1280:
325*c0909341SAndroid Build Coastguard Worker640:
326*c0909341SAndroid Build Coastguard Worker320:
327*c0909341SAndroid Build Coastguard Worker160:
328*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
329*c0909341SAndroid Build Coastguard Worker        mov             w11, w4
330*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  w4,  uxtw
331*c0909341SAndroid Build Coastguard Worker.if \type == 444
332*c0909341SAndroid Build Coastguard Worker        add             x10, x6,  w4,  uxtw
333*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
334*c0909341SAndroid Build Coastguard Worker        add             x10, x6,  x11, lsr #1
335*c0909341SAndroid Build Coastguard Worker.endif
336*c0909341SAndroid Build Coastguard Worker        add             x9,  x3,  w4,  uxtw #1
337*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  w4,  uxtw #1
338*c0909341SAndroid Build Coastguard Worker161:
339*c0909341SAndroid Build Coastguard Worker        mov             w8,  w4
340*c0909341SAndroid Build Coastguard Worker16:
341*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h,   v5.8h},   [x2],  #32
342*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8h,   v7.8h},   [x3],  #32
343*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h,  v17.8h},  [x7],  #32
344*c0909341SAndroid Build Coastguard Worker        ld1             {v18.8h,  v19.8h},  [x9],  #32
345*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #16
346*c0909341SAndroid Build Coastguard Worker        sub             v6.8h,   v6.8h,   v4.8h
347*c0909341SAndroid Build Coastguard Worker        sub             v7.8h,   v7.8h,   v5.8h
348*c0909341SAndroid Build Coastguard Worker        sub             v18.8h,  v18.8h,  v16.8h
349*c0909341SAndroid Build Coastguard Worker        sub             v19.8h,  v19.8h,  v17.8h
350*c0909341SAndroid Build Coastguard Worker        abs             v20.8h,  v6.8h
351*c0909341SAndroid Build Coastguard Worker        abs             v21.8h,  v7.8h
352*c0909341SAndroid Build Coastguard Worker        abs             v22.8h,  v18.8h
353*c0909341SAndroid Build Coastguard Worker        abs             v23.8h,  v19.8h
354*c0909341SAndroid Build Coastguard Worker        uqsub           v20.8h,  v0.8h,   v20.8h
355*c0909341SAndroid Build Coastguard Worker        uqsub           v21.8h,  v0.8h,   v21.8h
356*c0909341SAndroid Build Coastguard Worker        uqsub           v22.8h,  v0.8h,   v22.8h
357*c0909341SAndroid Build Coastguard Worker        uqsub           v23.8h,  v0.8h,   v23.8h
358*c0909341SAndroid Build Coastguard Worker        ushr            v20.8h,  v20.8h,  #8
359*c0909341SAndroid Build Coastguard Worker        ushr            v21.8h,  v21.8h,  #8
360*c0909341SAndroid Build Coastguard Worker        ushr            v22.8h,  v22.8h,  #8
361*c0909341SAndroid Build Coastguard Worker        ushr            v23.8h,  v23.8h,  #8
362*c0909341SAndroid Build Coastguard Worker        shl             v24.8h,  v20.8h,  #9
363*c0909341SAndroid Build Coastguard Worker        shl             v25.8h,  v21.8h,  #9
364*c0909341SAndroid Build Coastguard Worker        shl             v26.8h,  v22.8h,  #9
365*c0909341SAndroid Build Coastguard Worker        shl             v27.8h,  v23.8h,  #9
366*c0909341SAndroid Build Coastguard Worker        sqdmulh         v24.8h,  v24.8h,  v6.8h
367*c0909341SAndroid Build Coastguard Worker        sqdmulh         v25.8h,  v25.8h,  v7.8h
368*c0909341SAndroid Build Coastguard Worker        sqdmulh         v26.8h,  v26.8h,  v18.8h
369*c0909341SAndroid Build Coastguard Worker        sqdmulh         v27.8h,  v27.8h,  v19.8h
370*c0909341SAndroid Build Coastguard Worker        add             v24.8h,  v24.8h,  v4.8h
371*c0909341SAndroid Build Coastguard Worker        add             v25.8h,  v25.8h,  v5.8h
372*c0909341SAndroid Build Coastguard Worker        add             v26.8h,  v26.8h,  v16.8h
373*c0909341SAndroid Build Coastguard Worker        add             v27.8h,  v27.8h,  v17.8h
374*c0909341SAndroid Build Coastguard Worker        sqrshrun        v24.8b,  v24.8h,  #4
375*c0909341SAndroid Build Coastguard Worker        sqrshrun        v25.8b,  v25.8h,  #4
376*c0909341SAndroid Build Coastguard Worker        sqrshrun        v26.8b,  v26.8h,  #4
377*c0909341SAndroid Build Coastguard Worker        sqrshrun        v27.8b,  v27.8h,  #4
378*c0909341SAndroid Build Coastguard Worker.if \type == 444
379*c0909341SAndroid Build Coastguard Worker        uzp1            v20.16b, v20.16b, v21.16b       // Same as xtn, xtn2
380*c0909341SAndroid Build Coastguard Worker        uzp1            v21.16b, v22.16b, v23.16b       // Ditto
381*c0909341SAndroid Build Coastguard Worker        sub             v20.16b, v1.16b,  v20.16b
382*c0909341SAndroid Build Coastguard Worker        sub             v21.16b, v1.16b,  v21.16b
383*c0909341SAndroid Build Coastguard Worker        st1             {v20.16b}, [x6],  #16
384*c0909341SAndroid Build Coastguard Worker        st1             {v21.16b}, [x10], #16
385*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
386*c0909341SAndroid Build Coastguard Worker        addp            v20.8h,  v20.8h,  v21.8h
387*c0909341SAndroid Build Coastguard Worker        addp            v21.8h,  v22.8h,  v23.8h
388*c0909341SAndroid Build Coastguard Worker        xtn             v20.8b,  v20.8h
389*c0909341SAndroid Build Coastguard Worker        xtn             v21.8b,  v21.8h
390*c0909341SAndroid Build Coastguard Worker        uhsub           v20.8b,  v3.8b,   v20.8b
391*c0909341SAndroid Build Coastguard Worker        uhsub           v21.8b,  v3.8b,   v21.8b
392*c0909341SAndroid Build Coastguard Worker        st1             {v20.8b},  [x6],  #8
393*c0909341SAndroid Build Coastguard Worker        st1             {v21.8b},  [x10], #8
394*c0909341SAndroid Build Coastguard Worker.elseif \type == 420
395*c0909341SAndroid Build Coastguard Worker        add             v20.8h,  v20.8h,  v22.8h
396*c0909341SAndroid Build Coastguard Worker        add             v21.8h,  v21.8h,  v23.8h
397*c0909341SAndroid Build Coastguard Worker        addp            v20.8h,  v20.8h,  v21.8h
398*c0909341SAndroid Build Coastguard Worker        sub             v20.8h,  v3.8h,   v20.8h
399*c0909341SAndroid Build Coastguard Worker        rshrn           v20.8b,  v20.8h,  #2
400*c0909341SAndroid Build Coastguard Worker        st1             {v20.8b},  [x6],  #8
401*c0909341SAndroid Build Coastguard Worker.endif
402*c0909341SAndroid Build Coastguard Worker        st1             {v24.8b,  v25.8b},  [x0],  #16
403*c0909341SAndroid Build Coastguard Worker        st1             {v26.8b,  v27.8b},  [x12], #16
404*c0909341SAndroid Build Coastguard Worker        b.gt            16b
405*c0909341SAndroid Build Coastguard Worker        subs            w5,  w5,  #2
406*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  w4,  uxtw #1
407*c0909341SAndroid Build Coastguard Worker        add             x3,  x3,  w4,  uxtw #1
408*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  w4,  uxtw #1
409*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  w4,  uxtw #1
410*c0909341SAndroid Build Coastguard Worker.if \type == 444
411*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  w4,  uxtw
412*c0909341SAndroid Build Coastguard Worker        add             x10, x10, w4,  uxtw
413*c0909341SAndroid Build Coastguard Worker.elseif \type == 422
414*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  x11, lsr #1
415*c0909341SAndroid Build Coastguard Worker        add             x10, x10, x11, lsr #1
416*c0909341SAndroid Build Coastguard Worker.endif
417*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1
418*c0909341SAndroid Build Coastguard Worker        add             x12, x12, x1
419*c0909341SAndroid Build Coastguard Worker        b.gt            161b
420*c0909341SAndroid Build Coastguard Worker        ret
421*c0909341SAndroid Build Coastguard Workerendfunc
422*c0909341SAndroid Build Coastguard Worker
423*c0909341SAndroid Build Coastguard Workerjumptable w_mask_\type\()_tbl
424*c0909341SAndroid Build Coastguard Worker        .word 1280b - w_mask_\type\()_tbl
425*c0909341SAndroid Build Coastguard Worker        .word 640b  - w_mask_\type\()_tbl
426*c0909341SAndroid Build Coastguard Worker        .word 320b  - w_mask_\type\()_tbl
427*c0909341SAndroid Build Coastguard Worker        .word 160b  - w_mask_\type\()_tbl
428*c0909341SAndroid Build Coastguard Worker        .word 80b   - w_mask_\type\()_tbl
429*c0909341SAndroid Build Coastguard Worker        .word 40b   - w_mask_\type\()_tbl
430*c0909341SAndroid Build Coastguard Workerendjumptable
431*c0909341SAndroid Build Coastguard Worker.endm
432*c0909341SAndroid Build Coastguard Worker
433*c0909341SAndroid Build Coastguard Workerw_mask_fn 444
434*c0909341SAndroid Build Coastguard Workerw_mask_fn 422
435*c0909341SAndroid Build Coastguard Workerw_mask_fn 420
436*c0909341SAndroid Build Coastguard Worker
437*c0909341SAndroid Build Coastguard Worker
438*c0909341SAndroid Build Coastguard Workerfunction blend_8bpc_neon, export=1
439*c0909341SAndroid Build Coastguard Worker        movrel          x6,  blend_tbl
440*c0909341SAndroid Build Coastguard Worker        clz             w3,  w3
441*c0909341SAndroid Build Coastguard Worker        sub             w3,  w3,  #26
442*c0909341SAndroid Build Coastguard Worker        ldrsw           x3,  [x6,  x3,  lsl #2]
443*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  x3
444*c0909341SAndroid Build Coastguard Worker        movi            v4.16b,  #64
445*c0909341SAndroid Build Coastguard Worker        add             x8,  x0,  x1
446*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
447*c0909341SAndroid Build Coastguard Worker        br              x6
448*c0909341SAndroid Build Coastguard Worker40:
449*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
450*c0909341SAndroid Build Coastguard Worker4:
451*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b},  [x5],  #8
452*c0909341SAndroid Build Coastguard Worker        ldr             d1,       [x2],  #8
453*c0909341SAndroid Build Coastguard Worker        ldr             s0,       [x0]
454*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
455*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[1],   [x8]
456*c0909341SAndroid Build Coastguard Worker        sub             v3.8b,   v4.8b,   v2.8b
457*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,   v1.8b,   v2.8b
458*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,   v0.8b,   v3.8b
459*c0909341SAndroid Build Coastguard Worker        rshrn           v6.8b,   v5.8h,   #6
460*c0909341SAndroid Build Coastguard Worker        st1             {v6.s}[0],   [x0],  x1
461*c0909341SAndroid Build Coastguard Worker        st1             {v6.s}[1],   [x8],  x1
462*c0909341SAndroid Build Coastguard Worker        b.gt            4b
463*c0909341SAndroid Build Coastguard Worker        ret
464*c0909341SAndroid Build Coastguard Worker80:
465*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
466*c0909341SAndroid Build Coastguard Worker8:
467*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b},  [x5],  #16
468*c0909341SAndroid Build Coastguard Worker        ld1             {v1.16b},  [x2],  #16
469*c0909341SAndroid Build Coastguard Worker        ldr             d0,        [x0]
470*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[1], [x8]
471*c0909341SAndroid Build Coastguard Worker        sub             v3.16b,  v4.16b,  v2.16b
472*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
473*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,   v1.8b,   v2.8b
474*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,   v0.8b,   v3.8b
475*c0909341SAndroid Build Coastguard Worker        umull2          v6.8h,   v1.16b,  v2.16b
476*c0909341SAndroid Build Coastguard Worker        umlal2          v6.8h,   v0.16b,  v3.16b
477*c0909341SAndroid Build Coastguard Worker        rshrn           v7.8b,   v5.8h,   #6
478*c0909341SAndroid Build Coastguard Worker        rshrn           v16.8b,  v6.8h,   #6
479*c0909341SAndroid Build Coastguard Worker        st1             {v7.8b},   [x0],  x1
480*c0909341SAndroid Build Coastguard Worker        st1             {v16.8b},  [x8],  x1
481*c0909341SAndroid Build Coastguard Worker        b.gt            8b
482*c0909341SAndroid Build Coastguard Worker        ret
483*c0909341SAndroid Build Coastguard Worker160:
484*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
485*c0909341SAndroid Build Coastguard Worker16:
486*c0909341SAndroid Build Coastguard Worker        ld1             {v1.16b,  v2.16b},  [x5],  #32
487*c0909341SAndroid Build Coastguard Worker        ld1             {v5.16b,  v6.16b},  [x2],  #32
488*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b},  [x0]
489*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
490*c0909341SAndroid Build Coastguard Worker        sub             v7.16b,  v4.16b,  v1.16b
491*c0909341SAndroid Build Coastguard Worker        sub             v20.16b, v4.16b,  v2.16b
492*c0909341SAndroid Build Coastguard Worker        ld1             {v3.16b},  [x8]
493*c0909341SAndroid Build Coastguard Worker        umull           v16.8h,  v5.8b,   v1.8b
494*c0909341SAndroid Build Coastguard Worker        umlal           v16.8h,  v0.8b,   v7.8b
495*c0909341SAndroid Build Coastguard Worker        umull2          v17.8h,  v5.16b,  v1.16b
496*c0909341SAndroid Build Coastguard Worker        umlal2          v17.8h,  v0.16b,  v7.16b
497*c0909341SAndroid Build Coastguard Worker        umull           v21.8h,  v6.8b,   v2.8b
498*c0909341SAndroid Build Coastguard Worker        umlal           v21.8h,  v3.8b,   v20.8b
499*c0909341SAndroid Build Coastguard Worker        umull2          v22.8h,  v6.16b,  v2.16b
500*c0909341SAndroid Build Coastguard Worker        umlal2          v22.8h,  v3.16b,  v20.16b
501*c0909341SAndroid Build Coastguard Worker        rshrn           v18.8b,  v16.8h,  #6
502*c0909341SAndroid Build Coastguard Worker        rshrn2          v18.16b, v17.8h,  #6
503*c0909341SAndroid Build Coastguard Worker        rshrn           v19.8b,  v21.8h,  #6
504*c0909341SAndroid Build Coastguard Worker        rshrn2          v19.16b, v22.8h,  #6
505*c0909341SAndroid Build Coastguard Worker        st1             {v18.16b}, [x0],  x1
506*c0909341SAndroid Build Coastguard Worker        st1             {v19.16b}, [x8],  x1
507*c0909341SAndroid Build Coastguard Worker        b.gt            16b
508*c0909341SAndroid Build Coastguard Worker        ret
509*c0909341SAndroid Build Coastguard Worker320:
510*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
511*c0909341SAndroid Build Coastguard Worker32:
512*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b,  v1.16b,  v2.16b,  v3.16b},  [x5],  #64
513*c0909341SAndroid Build Coastguard Worker        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x2],  #64
514*c0909341SAndroid Build Coastguard Worker        ld1             {v20.16b, v21.16b}, [x0]
515*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
516*c0909341SAndroid Build Coastguard Worker        ld1             {v22.16b, v23.16b}, [x8]
517*c0909341SAndroid Build Coastguard Worker        sub             v5.16b,  v4.16b,  v0.16b
518*c0909341SAndroid Build Coastguard Worker        sub             v6.16b,  v4.16b,  v1.16b
519*c0909341SAndroid Build Coastguard Worker        sub             v30.16b, v4.16b,  v2.16b
520*c0909341SAndroid Build Coastguard Worker        sub             v31.16b, v4.16b,  v3.16b
521*c0909341SAndroid Build Coastguard Worker        umull           v24.8h,  v16.8b,  v0.8b
522*c0909341SAndroid Build Coastguard Worker        umlal           v24.8h,  v20.8b,  v5.8b
523*c0909341SAndroid Build Coastguard Worker        umull2          v26.8h,  v16.16b, v0.16b
524*c0909341SAndroid Build Coastguard Worker        umlal2          v26.8h,  v20.16b, v5.16b
525*c0909341SAndroid Build Coastguard Worker        umull           v28.8h,  v17.8b,  v1.8b
526*c0909341SAndroid Build Coastguard Worker        umlal           v28.8h,  v21.8b,  v6.8b
527*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h,   v17.16b, v1.16b
528*c0909341SAndroid Build Coastguard Worker        umlal2          v7.8h,   v21.16b, v6.16b
529*c0909341SAndroid Build Coastguard Worker        umull           v27.8h,  v18.8b,  v2.8b
530*c0909341SAndroid Build Coastguard Worker        umlal           v27.8h,  v22.8b,  v30.8b
531*c0909341SAndroid Build Coastguard Worker        umull2          v1.8h,   v18.16b, v2.16b
532*c0909341SAndroid Build Coastguard Worker        umlal2          v1.8h,   v22.16b, v30.16b
533*c0909341SAndroid Build Coastguard Worker        umull           v29.8h,  v19.8b,  v3.8b
534*c0909341SAndroid Build Coastguard Worker        umlal           v29.8h,  v23.8b,  v31.8b
535*c0909341SAndroid Build Coastguard Worker        umull2          v21.8h,  v19.16b, v3.16b
536*c0909341SAndroid Build Coastguard Worker        umlal2          v21.8h,  v23.16b, v31.16b
537*c0909341SAndroid Build Coastguard Worker        rshrn           v24.8b,  v24.8h,  #6
538*c0909341SAndroid Build Coastguard Worker        rshrn2          v24.16b, v26.8h,  #6
539*c0909341SAndroid Build Coastguard Worker        rshrn           v25.8b,  v28.8h,  #6
540*c0909341SAndroid Build Coastguard Worker        rshrn2          v25.16b, v7.8h,   #6
541*c0909341SAndroid Build Coastguard Worker        rshrn           v27.8b,  v27.8h,  #6
542*c0909341SAndroid Build Coastguard Worker        rshrn2          v27.16b, v1.8h,   #6
543*c0909341SAndroid Build Coastguard Worker        rshrn           v28.8b,  v29.8h,  #6
544*c0909341SAndroid Build Coastguard Worker        rshrn2          v28.16b, v21.8h,  #6
545*c0909341SAndroid Build Coastguard Worker        st1             {v24.16b, v25.16b}, [x0],  x1
546*c0909341SAndroid Build Coastguard Worker        st1             {v27.16b, v28.16b}, [x8],  x1
547*c0909341SAndroid Build Coastguard Worker        b.gt            32b
548*c0909341SAndroid Build Coastguard Worker        ret
549*c0909341SAndroid Build Coastguard Workerendfunc
550*c0909341SAndroid Build Coastguard Worker
551*c0909341SAndroid Build Coastguard Workerjumptable blend_tbl
552*c0909341SAndroid Build Coastguard Worker        .word 320b - blend_tbl
553*c0909341SAndroid Build Coastguard Worker        .word 160b - blend_tbl
554*c0909341SAndroid Build Coastguard Worker        .word 80b  - blend_tbl
555*c0909341SAndroid Build Coastguard Worker        .word 40b  - blend_tbl
556*c0909341SAndroid Build Coastguard Workerendjumptable
557*c0909341SAndroid Build Coastguard Worker
558*c0909341SAndroid Build Coastguard Workerfunction blend_h_8bpc_neon, export=1
559*c0909341SAndroid Build Coastguard Worker        movrel          x6,  blend_h_tbl
560*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(obmc_masks)
561*c0909341SAndroid Build Coastguard Worker        add             x5,  x5,  w4,  uxtw
562*c0909341SAndroid Build Coastguard Worker        sub             w4,  w4,  w4,  lsr #2
563*c0909341SAndroid Build Coastguard Worker        clz             w7,  w3
564*c0909341SAndroid Build Coastguard Worker        movi            v4.16b,  #64
565*c0909341SAndroid Build Coastguard Worker        add             x8,  x0,  x1
566*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
567*c0909341SAndroid Build Coastguard Worker        sub             w7,  w7,  #24
568*c0909341SAndroid Build Coastguard Worker        ldrsw           x7,  [x6,  x7,  lsl #2]
569*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  x7
570*c0909341SAndroid Build Coastguard Worker        br              x6
571*c0909341SAndroid Build Coastguard Worker20:
572*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
573*c0909341SAndroid Build Coastguard Worker2:
574*c0909341SAndroid Build Coastguard Worker        ldr             h0,  [x5],  #2
575*c0909341SAndroid Build Coastguard Worker        ldr             s1,  [x2],  #4
576*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
577*c0909341SAndroid Build Coastguard Worker        ldr             h2,  [x0]
578*c0909341SAndroid Build Coastguard Worker        zip1            v0.8b,   v0.8b,   v0.8b
579*c0909341SAndroid Build Coastguard Worker        sub             v3.8b,   v4.8b,   v0.8b
580*c0909341SAndroid Build Coastguard Worker        ld1             {v2.h}[1],   [x8]
581*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,   v1.8b,   v0.8b
582*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,   v2.8b,   v3.8b
583*c0909341SAndroid Build Coastguard Worker        rshrn           v5.8b,   v5.8h,   #6
584*c0909341SAndroid Build Coastguard Worker        st1             {v5.h}[0],   [x0],  x1
585*c0909341SAndroid Build Coastguard Worker        st1             {v5.h}[1],   [x8],  x1
586*c0909341SAndroid Build Coastguard Worker        b.gt            2b
587*c0909341SAndroid Build Coastguard Worker        ret
588*c0909341SAndroid Build Coastguard Worker40:
589*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
590*c0909341SAndroid Build Coastguard Worker4:
591*c0909341SAndroid Build Coastguard Worker        ld2r            {v0.8b,   v1.8b},   [x5],  #2
592*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b},   [x2],  #8
593*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
594*c0909341SAndroid Build Coastguard Worker        ext             v0.8b,   v0.8b,   v1.8b,   #4
595*c0909341SAndroid Build Coastguard Worker        ldr             s3,          [x0]
596*c0909341SAndroid Build Coastguard Worker        sub             v5.8b,   v4.8b,   v0.8b
597*c0909341SAndroid Build Coastguard Worker        ld1             {v3.s}[1],   [x8]
598*c0909341SAndroid Build Coastguard Worker        umull           v6.8h,   v2.8b,   v0.8b
599*c0909341SAndroid Build Coastguard Worker        umlal           v6.8h,   v3.8b,   v5.8b
600*c0909341SAndroid Build Coastguard Worker        rshrn           v6.8b,   v6.8h,   #6
601*c0909341SAndroid Build Coastguard Worker        st1             {v6.s}[0],   [x0],  x1
602*c0909341SAndroid Build Coastguard Worker        st1             {v6.s}[1],   [x8],  x1
603*c0909341SAndroid Build Coastguard Worker        b.gt            4b
604*c0909341SAndroid Build Coastguard Worker        ret
605*c0909341SAndroid Build Coastguard Worker80:
606*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
607*c0909341SAndroid Build Coastguard Worker8:
608*c0909341SAndroid Build Coastguard Worker        ld2r            {v0.16b,  v1.16b},  [x5],  #2
609*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b},  [x2],  #16
610*c0909341SAndroid Build Coastguard Worker        ldr             d3,        [x0]
611*c0909341SAndroid Build Coastguard Worker        ext             v0.16b,  v0.16b,  v1.16b,  #8
612*c0909341SAndroid Build Coastguard Worker        sub             v5.16b,  v4.16b,  v0.16b
613*c0909341SAndroid Build Coastguard Worker        ld1             {v3.d}[1], [x8]
614*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
615*c0909341SAndroid Build Coastguard Worker        umull           v6.8h,   v0.8b,   v2.8b
616*c0909341SAndroid Build Coastguard Worker        umlal           v6.8h,   v3.8b,   v5.8b
617*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h,   v0.16b,  v2.16b
618*c0909341SAndroid Build Coastguard Worker        umlal2          v7.8h,   v3.16b,  v5.16b
619*c0909341SAndroid Build Coastguard Worker        rshrn           v16.8b,  v6.8h,   #6
620*c0909341SAndroid Build Coastguard Worker        rshrn           v17.8b,  v7.8h,   #6
621*c0909341SAndroid Build Coastguard Worker        st1             {v16.8b},  [x0],  x1
622*c0909341SAndroid Build Coastguard Worker        st1             {v17.8b},  [x8],  x1
623*c0909341SAndroid Build Coastguard Worker        b.gt            8b
624*c0909341SAndroid Build Coastguard Worker        ret
625*c0909341SAndroid Build Coastguard Worker160:
626*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
627*c0909341SAndroid Build Coastguard Worker16:
628*c0909341SAndroid Build Coastguard Worker        ld2r            {v0.16b,  v1.16b},  [x5],  #2
629*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b,  v3.16b},  [x2],  #32
630*c0909341SAndroid Build Coastguard Worker        ld1             {v5.16b},  [x0]
631*c0909341SAndroid Build Coastguard Worker        sub             v7.16b,  v4.16b,  v0.16b
632*c0909341SAndroid Build Coastguard Worker        sub             v16.16b, v4.16b,  v1.16b
633*c0909341SAndroid Build Coastguard Worker        ld1             {v6.16b},  [x8]
634*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
635*c0909341SAndroid Build Coastguard Worker        umull           v17.8h,  v0.8b,   v2.8b
636*c0909341SAndroid Build Coastguard Worker        umlal           v17.8h,  v5.8b,   v7.8b
637*c0909341SAndroid Build Coastguard Worker        umull2          v18.8h,  v0.16b,  v2.16b
638*c0909341SAndroid Build Coastguard Worker        umlal2          v18.8h,  v5.16b,  v7.16b
639*c0909341SAndroid Build Coastguard Worker        umull           v19.8h,  v1.8b,   v3.8b
640*c0909341SAndroid Build Coastguard Worker        umlal           v19.8h,  v6.8b,   v16.8b
641*c0909341SAndroid Build Coastguard Worker        umull2          v20.8h,  v1.16b,  v3.16b
642*c0909341SAndroid Build Coastguard Worker        umlal2          v20.8h,  v6.16b,  v16.16b
643*c0909341SAndroid Build Coastguard Worker        rshrn           v21.8b,  v17.8h,  #6
644*c0909341SAndroid Build Coastguard Worker        rshrn2          v21.16b, v18.8h,  #6
645*c0909341SAndroid Build Coastguard Worker        rshrn           v22.8b,  v19.8h,  #6
646*c0909341SAndroid Build Coastguard Worker        rshrn2          v22.16b, v20.8h,  #6
647*c0909341SAndroid Build Coastguard Worker        st1             {v21.16b}, [x0],  x1
648*c0909341SAndroid Build Coastguard Worker        st1             {v22.16b}, [x8],  x1
649*c0909341SAndroid Build Coastguard Worker        b.gt            16b
650*c0909341SAndroid Build Coastguard Worker        ret
651*c0909341SAndroid Build Coastguard Worker1280:
652*c0909341SAndroid Build Coastguard Worker640:
653*c0909341SAndroid Build Coastguard Worker320:
654*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
655*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  w3,  uxtw
656*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  w3,  uxtw
657*c0909341SAndroid Build Coastguard Worker321:
658*c0909341SAndroid Build Coastguard Worker        ld2r            {v0.16b,  v1.16b},  [x5],  #2
659*c0909341SAndroid Build Coastguard Worker        mov             w6,  w3
660*c0909341SAndroid Build Coastguard Worker        sub             v20.16b, v4.16b,  v0.16b
661*c0909341SAndroid Build Coastguard Worker        sub             v21.16b, v4.16b,  v1.16b
662*c0909341SAndroid Build Coastguard Worker32:
663*c0909341SAndroid Build Coastguard Worker        ld1             {v16.16b, v17.16b}, [x2],  #32
664*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b,  v3.16b},  [x0]
665*c0909341SAndroid Build Coastguard Worker        subs            w6,  w6,  #32
666*c0909341SAndroid Build Coastguard Worker        umull           v23.8h,  v0.8b,   v16.8b
667*c0909341SAndroid Build Coastguard Worker        umlal           v23.8h,  v2.8b,   v20.8b
668*c0909341SAndroid Build Coastguard Worker        ld1             {v18.16b, v19.16b}, [x7],  #32
669*c0909341SAndroid Build Coastguard Worker        umull2          v27.8h,  v0.16b,  v16.16b
670*c0909341SAndroid Build Coastguard Worker        umlal2          v27.8h,  v2.16b,  v20.16b
671*c0909341SAndroid Build Coastguard Worker        ld1             {v6.16b,  v7.16b},  [x8]
672*c0909341SAndroid Build Coastguard Worker        umull           v24.8h,  v0.8b,   v17.8b
673*c0909341SAndroid Build Coastguard Worker        umlal           v24.8h,  v3.8b,   v20.8b
674*c0909341SAndroid Build Coastguard Worker        umull2          v28.8h,  v0.16b,  v17.16b
675*c0909341SAndroid Build Coastguard Worker        umlal2          v28.8h,  v3.16b,  v20.16b
676*c0909341SAndroid Build Coastguard Worker        umull           v25.8h,  v1.8b,   v18.8b
677*c0909341SAndroid Build Coastguard Worker        umlal           v25.8h,  v6.8b,   v21.8b
678*c0909341SAndroid Build Coastguard Worker        umull2          v5.8h,   v1.16b,  v18.16b
679*c0909341SAndroid Build Coastguard Worker        umlal2          v5.8h,   v6.16b,  v21.16b
680*c0909341SAndroid Build Coastguard Worker        rshrn           v29.8b,  v23.8h,  #6
681*c0909341SAndroid Build Coastguard Worker        rshrn2          v29.16b, v27.8h,  #6
682*c0909341SAndroid Build Coastguard Worker        umull           v26.8h,  v1.8b,   v19.8b
683*c0909341SAndroid Build Coastguard Worker        umlal           v26.8h,  v7.8b,   v21.8b
684*c0909341SAndroid Build Coastguard Worker        umull2          v31.8h,  v1.16b,  v19.16b
685*c0909341SAndroid Build Coastguard Worker        umlal2          v31.8h,  v7.16b,  v21.16b
686*c0909341SAndroid Build Coastguard Worker        rshrn           v30.8b,  v24.8h,  #6
687*c0909341SAndroid Build Coastguard Worker        rshrn2          v30.16b, v28.8h,  #6
688*c0909341SAndroid Build Coastguard Worker        rshrn           v23.8b,  v25.8h,  #6
689*c0909341SAndroid Build Coastguard Worker        rshrn2          v23.16b, v5.8h,   #6
690*c0909341SAndroid Build Coastguard Worker        rshrn           v24.8b,  v26.8h,  #6
691*c0909341SAndroid Build Coastguard Worker        st1             {v29.16b, v30.16b}, [x0],  #32
692*c0909341SAndroid Build Coastguard Worker        rshrn2          v24.16b, v31.8h,  #6
693*c0909341SAndroid Build Coastguard Worker        st1             {v23.16b, v24.16b}, [x8],  #32
694*c0909341SAndroid Build Coastguard Worker        b.gt            32b
695*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
696*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1
697*c0909341SAndroid Build Coastguard Worker        add             x8,  x8,  x1
698*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  w3,  uxtw
699*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  w3,  uxtw
700*c0909341SAndroid Build Coastguard Worker        b.gt            321b
701*c0909341SAndroid Build Coastguard Worker        ret
702*c0909341SAndroid Build Coastguard Workerendfunc
703*c0909341SAndroid Build Coastguard Worker
704*c0909341SAndroid Build Coastguard Workerjumptable blend_h_tbl
705*c0909341SAndroid Build Coastguard Worker        .word 1280b - blend_h_tbl
706*c0909341SAndroid Build Coastguard Worker        .word 640b  - blend_h_tbl
707*c0909341SAndroid Build Coastguard Worker        .word 320b  - blend_h_tbl
708*c0909341SAndroid Build Coastguard Worker        .word 160b  - blend_h_tbl
709*c0909341SAndroid Build Coastguard Worker        .word 80b   - blend_h_tbl
710*c0909341SAndroid Build Coastguard Worker        .word 40b   - blend_h_tbl
711*c0909341SAndroid Build Coastguard Worker        .word 20b   - blend_h_tbl
712*c0909341SAndroid Build Coastguard Workerendjumptable
713*c0909341SAndroid Build Coastguard Worker
714*c0909341SAndroid Build Coastguard Workerfunction blend_v_8bpc_neon, export=1
715*c0909341SAndroid Build Coastguard Worker        movrel          x6,  blend_v_tbl
716*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(obmc_masks)
717*c0909341SAndroid Build Coastguard Worker        add             x5,  x5,  w3,  uxtw
718*c0909341SAndroid Build Coastguard Worker        clz             w3,  w3
719*c0909341SAndroid Build Coastguard Worker        movi            v4.16b,  #64
720*c0909341SAndroid Build Coastguard Worker        add             x8,  x0,  x1
721*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
722*c0909341SAndroid Build Coastguard Worker        sub             w3,  w3,  #26
723*c0909341SAndroid Build Coastguard Worker        ldrsw           x3,  [x6,  x3,  lsl #2]
724*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  x3
725*c0909341SAndroid Build Coastguard Worker        br              x6
726*c0909341SAndroid Build Coastguard Worker20:
727*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
728*c0909341SAndroid Build Coastguard Worker        ld1r            {v0.8b},   [x5]
729*c0909341SAndroid Build Coastguard Worker        sub             v1.8b,   v4.8b,   v0.8b
730*c0909341SAndroid Build Coastguard Worker2:
731*c0909341SAndroid Build Coastguard Worker        ldr             h2,          [x2],  #2
732*c0909341SAndroid Build Coastguard Worker        ldr             b3,          [x0]
733*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
734*c0909341SAndroid Build Coastguard Worker        ld1             {v2.b}[1],   [x2]
735*c0909341SAndroid Build Coastguard Worker        ld1             {v3.b}[1],   [x8]
736*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,   v2.8b,   v0.8b
737*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,   v3.8b,   v1.8b
738*c0909341SAndroid Build Coastguard Worker        rshrn           v5.8b,   v5.8h,   #6
739*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #2
740*c0909341SAndroid Build Coastguard Worker        st1             {v5.b}[0],   [x0],  x1
741*c0909341SAndroid Build Coastguard Worker        st1             {v5.b}[1],   [x8],  x1
742*c0909341SAndroid Build Coastguard Worker        b.gt            2b
743*c0909341SAndroid Build Coastguard Worker        ret
744*c0909341SAndroid Build Coastguard Worker40:
745*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
746*c0909341SAndroid Build Coastguard Worker        ld1r            {v0.2s},   [x5]
747*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  #2
748*c0909341SAndroid Build Coastguard Worker        sub             v1.8b,   v4.8b,   v0.8b
749*c0909341SAndroid Build Coastguard Worker4:
750*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b},   [x2],  #8
751*c0909341SAndroid Build Coastguard Worker        ldr             s3,          [x0]
752*c0909341SAndroid Build Coastguard Worker        ld1             {v3.s}[1],   [x8]
753*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
754*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,   v2.8b,   v0.8b
755*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,   v3.8b,   v1.8b
756*c0909341SAndroid Build Coastguard Worker        rshrn           v5.8b,   v5.8h,   #6
757*c0909341SAndroid Build Coastguard Worker        str             h5,          [x0],  #2
758*c0909341SAndroid Build Coastguard Worker        st1             {v5.h}[2],   [x8],  #2
759*c0909341SAndroid Build Coastguard Worker        st1             {v5.b}[2],   [x0],  x1
760*c0909341SAndroid Build Coastguard Worker        st1             {v5.b}[6],   [x8],  x1
761*c0909341SAndroid Build Coastguard Worker        b.gt            4b
762*c0909341SAndroid Build Coastguard Worker        ret
763*c0909341SAndroid Build Coastguard Worker80:
764*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
765*c0909341SAndroid Build Coastguard Worker        ld1r            {v0.2d},   [x5]
766*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  #4
767*c0909341SAndroid Build Coastguard Worker        sub             v1.16b,  v4.16b,  v0.16b
768*c0909341SAndroid Build Coastguard Worker        zip2            v16.2d,  v1.2d,   v1.2d
769*c0909341SAndroid Build Coastguard Worker8:
770*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b},  [x2],  #16
771*c0909341SAndroid Build Coastguard Worker        ldr             d3,          [x0]
772*c0909341SAndroid Build Coastguard Worker        ldr             d4,          [x8]
773*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
774*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,  v0.8b,  v2.8b
775*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,  v3.8b,  v1.8b
776*c0909341SAndroid Build Coastguard Worker        umull2          v6.8h,  v0.16b, v2.16b
777*c0909341SAndroid Build Coastguard Worker        umlal           v6.8h,  v4.8b,  v16.8b
778*c0909341SAndroid Build Coastguard Worker        rshrn           v7.8b,  v5.8h,  #6
779*c0909341SAndroid Build Coastguard Worker        rshrn           v17.8b, v6.8h,  #6
780*c0909341SAndroid Build Coastguard Worker        str             s7,          [x0],  #4
781*c0909341SAndroid Build Coastguard Worker        str             s17,         [x8],  #4
782*c0909341SAndroid Build Coastguard Worker        st1             {v7.h}[2],   [x0],  x1
783*c0909341SAndroid Build Coastguard Worker        st1             {v17.h}[2],  [x8],  x1
784*c0909341SAndroid Build Coastguard Worker        b.gt            8b
785*c0909341SAndroid Build Coastguard Worker        ret
786*c0909341SAndroid Build Coastguard Worker160:
787*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
788*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b},  [x5]
789*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  #8
790*c0909341SAndroid Build Coastguard Worker        sub             v2.16b,  v4.16b,  v0.16b
791*c0909341SAndroid Build Coastguard Worker16:
792*c0909341SAndroid Build Coastguard Worker        ld1             {v5.16b,  v6.16b},  [x2],  #32
793*c0909341SAndroid Build Coastguard Worker        ld1             {v7.16b},  [x0]
794*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
795*c0909341SAndroid Build Coastguard Worker        ld1             {v16.16b}, [x8]
796*c0909341SAndroid Build Coastguard Worker        umull           v17.8h,  v5.8b,   v0.8b
797*c0909341SAndroid Build Coastguard Worker        umlal           v17.8h,  v7.8b,   v2.8b
798*c0909341SAndroid Build Coastguard Worker        umull2          v18.8h,  v5.16b,  v0.16b
799*c0909341SAndroid Build Coastguard Worker        umlal2          v18.8h,  v7.16b,  v2.16b
800*c0909341SAndroid Build Coastguard Worker        umull           v20.8h,  v6.8b,   v0.8b
801*c0909341SAndroid Build Coastguard Worker        umlal           v20.8h,  v16.8b,  v2.8b
802*c0909341SAndroid Build Coastguard Worker        umull2          v21.8h,  v6.16b,  v0.16b
803*c0909341SAndroid Build Coastguard Worker        umlal2          v21.8h,  v16.16b, v2.16b
804*c0909341SAndroid Build Coastguard Worker        rshrn           v19.8b,  v17.8h,  #6
805*c0909341SAndroid Build Coastguard Worker        rshrn2          v19.16b, v18.8h,  #6
806*c0909341SAndroid Build Coastguard Worker        rshrn           v22.8b,  v20.8h,  #6
807*c0909341SAndroid Build Coastguard Worker        rshrn2          v22.16b, v21.8h,  #6
808*c0909341SAndroid Build Coastguard Worker        st1             {v19.8b},  [x0],  #8
809*c0909341SAndroid Build Coastguard Worker        st1             {v22.8b},  [x8],  #8
810*c0909341SAndroid Build Coastguard Worker        st1             {v19.s}[2],  [x0],  x1
811*c0909341SAndroid Build Coastguard Worker        st1             {v22.s}[2],  [x8],  x1
812*c0909341SAndroid Build Coastguard Worker        b.gt            16b
813*c0909341SAndroid Build Coastguard Worker        ret
814*c0909341SAndroid Build Coastguard Worker320:
815*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
816*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b,  v1.16b},  [x5]
817*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  #16
818*c0909341SAndroid Build Coastguard Worker        sub             v2.16b,  v4.16b,  v0.16b
819*c0909341SAndroid Build Coastguard Worker        sub             v3.8b,   v4.8b,   v1.8b
820*c0909341SAndroid Build Coastguard Worker32:
821*c0909341SAndroid Build Coastguard Worker        ld1             {v16.16b, v17.16b, v18.16b, v19.16b}, [x2],  #64
822*c0909341SAndroid Build Coastguard Worker        ld1             {v5.16b,  v6.16b},  [x0]
823*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
824*c0909341SAndroid Build Coastguard Worker        ld1             {v20.16b, v21.16b}, [x8]
825*c0909341SAndroid Build Coastguard Worker        umull           v22.8h,  v16.8b,  v0.8b
826*c0909341SAndroid Build Coastguard Worker        umlal           v22.8h,  v5.8b,   v2.8b
827*c0909341SAndroid Build Coastguard Worker        umull2          v23.8h,  v16.16b, v0.16b
828*c0909341SAndroid Build Coastguard Worker        umlal2          v23.8h,  v5.16b,  v2.16b
829*c0909341SAndroid Build Coastguard Worker        umull           v28.8h,  v17.8b,  v1.8b
830*c0909341SAndroid Build Coastguard Worker        umlal           v28.8h,  v6.8b,   v3.8b
831*c0909341SAndroid Build Coastguard Worker        umull           v30.8h,  v18.8b,  v0.8b
832*c0909341SAndroid Build Coastguard Worker        umlal           v30.8h,  v20.8b,  v2.8b
833*c0909341SAndroid Build Coastguard Worker        umull2          v31.8h,  v18.16b, v0.16b
834*c0909341SAndroid Build Coastguard Worker        umlal2          v31.8h,  v20.16b, v2.16b
835*c0909341SAndroid Build Coastguard Worker        umull           v25.8h,  v19.8b,  v1.8b
836*c0909341SAndroid Build Coastguard Worker        umlal           v25.8h,  v21.8b,  v3.8b
837*c0909341SAndroid Build Coastguard Worker        rshrn           v24.8b,  v22.8h,  #6
838*c0909341SAndroid Build Coastguard Worker        rshrn2          v24.16b, v23.8h,  #6
839*c0909341SAndroid Build Coastguard Worker        rshrn           v28.8b,  v28.8h,  #6
840*c0909341SAndroid Build Coastguard Worker        rshrn           v30.8b,  v30.8h,  #6
841*c0909341SAndroid Build Coastguard Worker        rshrn2          v30.16b, v31.8h,  #6
842*c0909341SAndroid Build Coastguard Worker        rshrn           v27.8b,  v25.8h,  #6
843*c0909341SAndroid Build Coastguard Worker        st1             {v24.16b}, [x0],  #16
844*c0909341SAndroid Build Coastguard Worker        st1             {v30.16b}, [x8],  #16
845*c0909341SAndroid Build Coastguard Worker        st1             {v28.8b},  [x0],  x1
846*c0909341SAndroid Build Coastguard Worker        st1             {v27.8b},  [x8],  x1
847*c0909341SAndroid Build Coastguard Worker        b.gt            32b
848*c0909341SAndroid Build Coastguard Worker        ret
849*c0909341SAndroid Build Coastguard Workerendfunc
850*c0909341SAndroid Build Coastguard Worker
851*c0909341SAndroid Build Coastguard Workerjumptable blend_v_tbl
852*c0909341SAndroid Build Coastguard Worker        .word 320b - blend_v_tbl
853*c0909341SAndroid Build Coastguard Worker        .word 160b - blend_v_tbl
854*c0909341SAndroid Build Coastguard Worker        .word 80b  - blend_v_tbl
855*c0909341SAndroid Build Coastguard Worker        .word 40b  - blend_v_tbl
856*c0909341SAndroid Build Coastguard Worker        .word 20b  - blend_v_tbl
857*c0909341SAndroid Build Coastguard Workerendjumptable
858*c0909341SAndroid Build Coastguard Worker
859*c0909341SAndroid Build Coastguard Worker
860*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions,
861*c0909341SAndroid Build Coastguard Worker// and assumes that x8 is set to (clz(w)-24).
862*c0909341SAndroid Build Coastguard Workerfunction put_neon, export=1
863*c0909341SAndroid Build Coastguard Worker        movrel          x9,  put_tbl
864*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
865*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
866*c0909341SAndroid Build Coastguard Worker        br              x9
867*c0909341SAndroid Build Coastguard Worker
868*c0909341SAndroid Build Coastguard Worker20:
869*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
870*c0909341SAndroid Build Coastguard Worker2:
871*c0909341SAndroid Build Coastguard Worker        ldrh            w9, [x2]
872*c0909341SAndroid Build Coastguard Worker        ldrh            w10, [x2, x3]
873*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3, lsl #1
874*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #2
875*c0909341SAndroid Build Coastguard Worker        strh            w9, [x0]
876*c0909341SAndroid Build Coastguard Worker        strh            w10, [x0, x1]
877*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1, lsl #1
878*c0909341SAndroid Build Coastguard Worker        b.gt            2b
879*c0909341SAndroid Build Coastguard Worker        ret
880*c0909341SAndroid Build Coastguard Worker40:
881*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
882*c0909341SAndroid Build Coastguard Worker4:
883*c0909341SAndroid Build Coastguard Worker        ldr             w9, [x2]
884*c0909341SAndroid Build Coastguard Worker        ldr             w10, [x2, x3]
885*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3, lsl #1
886*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #2
887*c0909341SAndroid Build Coastguard Worker        str             w9, [x0]
888*c0909341SAndroid Build Coastguard Worker        str             w10, [x0, x1]
889*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1, lsl #1
890*c0909341SAndroid Build Coastguard Worker        b.gt            4b
891*c0909341SAndroid Build Coastguard Worker        ret
892*c0909341SAndroid Build Coastguard Worker80:
893*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
894*c0909341SAndroid Build Coastguard Worker8:
895*c0909341SAndroid Build Coastguard Worker        ldr             x9, [x2]
896*c0909341SAndroid Build Coastguard Worker        ldr             x10, [x2, x3]
897*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3, lsl #1
898*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #2
899*c0909341SAndroid Build Coastguard Worker        str             x9, [x0]
900*c0909341SAndroid Build Coastguard Worker        str             x10, [x0, x1]
901*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1, lsl #1
902*c0909341SAndroid Build Coastguard Worker        b.gt            8b
903*c0909341SAndroid Build Coastguard Worker        ret
904*c0909341SAndroid Build Coastguard Worker160:
905*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
906*c0909341SAndroid Build Coastguard Worker16:
907*c0909341SAndroid Build Coastguard Worker        ldr             q0, [x2]
908*c0909341SAndroid Build Coastguard Worker        ldr             q1, [x2, x3]
909*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3, lsl #1
910*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #2
911*c0909341SAndroid Build Coastguard Worker        str             q0, [x0]
912*c0909341SAndroid Build Coastguard Worker        str             q1, [x0, x1]
913*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1, lsl #1
914*c0909341SAndroid Build Coastguard Worker        b.gt            16b
915*c0909341SAndroid Build Coastguard Worker        ret
916*c0909341SAndroid Build Coastguard Worker320:
917*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
918*c0909341SAndroid Build Coastguard Worker32:
919*c0909341SAndroid Build Coastguard Worker        ldp             q0, q1, [x2]
920*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3
921*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
922*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1
923*c0909341SAndroid Build Coastguard Worker        ldp             q2, q3, [x2]
924*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3
925*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0]
926*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #2
927*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1
928*c0909341SAndroid Build Coastguard Worker        b.gt            32b
929*c0909341SAndroid Build Coastguard Worker        ret
930*c0909341SAndroid Build Coastguard Worker640:
931*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
932*c0909341SAndroid Build Coastguard Worker64:
933*c0909341SAndroid Build Coastguard Worker        ldp             q0, q1, [x2]
934*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
935*c0909341SAndroid Build Coastguard Worker        ldp             q2, q3, [x2, #32]
936*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3
937*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
938*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #1
939*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1
940*c0909341SAndroid Build Coastguard Worker        b.gt            64b
941*c0909341SAndroid Build Coastguard Worker        ret
942*c0909341SAndroid Build Coastguard Worker1280:
943*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
944*c0909341SAndroid Build Coastguard Worker128:
945*c0909341SAndroid Build Coastguard Worker        ldp             q0, q1, [x2]
946*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
947*c0909341SAndroid Build Coastguard Worker        ldp             q2, q3, [x2, #32]
948*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
949*c0909341SAndroid Build Coastguard Worker        ldp             q4, q5, [x2, #64]
950*c0909341SAndroid Build Coastguard Worker        stp             q4, q5, [x0, #64]
951*c0909341SAndroid Build Coastguard Worker        ldp             q6, q7, [x2, #96]
952*c0909341SAndroid Build Coastguard Worker        add             x2, x2, x3
953*c0909341SAndroid Build Coastguard Worker        stp             q6, q7, [x0, #96]
954*c0909341SAndroid Build Coastguard Worker        subs            w5, w5, #1
955*c0909341SAndroid Build Coastguard Worker        add             x0, x0, x1
956*c0909341SAndroid Build Coastguard Worker        b.gt            128b
957*c0909341SAndroid Build Coastguard Worker        ret
958*c0909341SAndroid Build Coastguard Workerendfunc
959*c0909341SAndroid Build Coastguard Worker
960*c0909341SAndroid Build Coastguard Workerjumptable put_tbl
961*c0909341SAndroid Build Coastguard Worker        .word 1280b - put_tbl
962*c0909341SAndroid Build Coastguard Worker        .word 640b  - put_tbl
963*c0909341SAndroid Build Coastguard Worker        .word 320b  - put_tbl
964*c0909341SAndroid Build Coastguard Worker        .word 160b  - put_tbl
965*c0909341SAndroid Build Coastguard Worker        .word 80b   - put_tbl
966*c0909341SAndroid Build Coastguard Worker        .word 40b   - put_tbl
967*c0909341SAndroid Build Coastguard Worker        .word 20b   - put_tbl
968*c0909341SAndroid Build Coastguard Workerendjumptable
969*c0909341SAndroid Build Coastguard Worker
970*c0909341SAndroid Build Coastguard Worker
971*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the prep_8tap functions,
972*c0909341SAndroid Build Coastguard Worker// and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
973*c0909341SAndroid Build Coastguard Workerfunction prep_neon, export=1
974*c0909341SAndroid Build Coastguard Worker        movrel          x9,  prep_tbl
975*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
976*c0909341SAndroid Build Coastguard Worker        movi            v24.16b, #16
977*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
978*c0909341SAndroid Build Coastguard Worker        br              x9
979*c0909341SAndroid Build Coastguard Worker
980*c0909341SAndroid Build Coastguard Worker40:
981*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
982*c0909341SAndroid Build Coastguard Worker4:
983*c0909341SAndroid Build Coastguard Worker        ldr             s0, [x1]
984*c0909341SAndroid Build Coastguard Worker        ldr             s2, [x1, x2]
985*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
986*c0909341SAndroid Build Coastguard Worker        ldr             s1, [x1]
987*c0909341SAndroid Build Coastguard Worker        ldr             s3, [x1, x2]
988*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
989*c0909341SAndroid Build Coastguard Worker        mov             v0.s[1], v2.s[0]
990*c0909341SAndroid Build Coastguard Worker        mov             v1.s[1], v3.s[0]
991*c0909341SAndroid Build Coastguard Worker        ushll           v0.8h, v0.8b, #4
992*c0909341SAndroid Build Coastguard Worker        ushll           v1.8h, v1.8b, #4
993*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #4
994*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0], #32
995*c0909341SAndroid Build Coastguard Worker        b.gt            4b
996*c0909341SAndroid Build Coastguard Worker        ret
997*c0909341SAndroid Build Coastguard Worker80:
998*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
999*c0909341SAndroid Build Coastguard Worker8:
1000*c0909341SAndroid Build Coastguard Worker        ldr             d0, [x1]
1001*c0909341SAndroid Build Coastguard Worker        ldr             d1, [x1, x2]
1002*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
1003*c0909341SAndroid Build Coastguard Worker        ldr             d2, [x1]
1004*c0909341SAndroid Build Coastguard Worker        ldr             d3, [x1, x2]
1005*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
1006*c0909341SAndroid Build Coastguard Worker        ushll           v0.8h, v0.8b, #4
1007*c0909341SAndroid Build Coastguard Worker        ushll           v1.8h, v1.8b, #4
1008*c0909341SAndroid Build Coastguard Worker        umull           v2.8h, v2.8b, v24.8b
1009*c0909341SAndroid Build Coastguard Worker        umull           v3.8h, v3.8b, v24.8b
1010*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #4
1011*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
1012*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
1013*c0909341SAndroid Build Coastguard Worker        add             x0, x0, #64
1014*c0909341SAndroid Build Coastguard Worker        b.gt            8b
1015*c0909341SAndroid Build Coastguard Worker        ret
1016*c0909341SAndroid Build Coastguard Worker160:
1017*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1018*c0909341SAndroid Build Coastguard Worker16:
1019*c0909341SAndroid Build Coastguard Worker        ldr             q1, [x1]
1020*c0909341SAndroid Build Coastguard Worker        ldr             q3, [x1, x2]
1021*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
1022*c0909341SAndroid Build Coastguard Worker        ushll           v0.8h, v1.8b, #4
1023*c0909341SAndroid Build Coastguard Worker        ushll2          v1.8h, v1.16b, #4
1024*c0909341SAndroid Build Coastguard Worker        ldr             q5, [x1]
1025*c0909341SAndroid Build Coastguard Worker        ldr             q7, [x1, x2]
1026*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2, lsl #1
1027*c0909341SAndroid Build Coastguard Worker        umull           v2.8h, v3.8b, v24.8b
1028*c0909341SAndroid Build Coastguard Worker        umull2          v3.8h, v3.16b, v24.16b
1029*c0909341SAndroid Build Coastguard Worker        ushll           v4.8h, v5.8b, #4
1030*c0909341SAndroid Build Coastguard Worker        ushll2          v5.8h, v5.16b, #4
1031*c0909341SAndroid Build Coastguard Worker        umull           v6.8h, v7.8b, v24.8b
1032*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h, v7.16b, v24.16b
1033*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #4
1034*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
1035*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
1036*c0909341SAndroid Build Coastguard Worker        stp             q4, q5, [x0, #64]
1037*c0909341SAndroid Build Coastguard Worker        stp             q6, q7, [x0, #96]
1038*c0909341SAndroid Build Coastguard Worker        add             x0, x0, #128
1039*c0909341SAndroid Build Coastguard Worker        b.gt            16b
1040*c0909341SAndroid Build Coastguard Worker        ret
1041*c0909341SAndroid Build Coastguard Worker320:
1042*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1043*c0909341SAndroid Build Coastguard Worker32:
1044*c0909341SAndroid Build Coastguard Worker        ldp             q4, q5, [x1]
1045*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2
1046*c0909341SAndroid Build Coastguard Worker        ldp             q6, q7, [x1]
1047*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2
1048*c0909341SAndroid Build Coastguard Worker        ushll           v0.8h, v4.8b, #4
1049*c0909341SAndroid Build Coastguard Worker        ushll2          v1.8h, v4.16b, #4
1050*c0909341SAndroid Build Coastguard Worker        umull           v2.8h, v5.8b, v24.8b
1051*c0909341SAndroid Build Coastguard Worker        umull2          v3.8h, v5.16b, v24.16b
1052*c0909341SAndroid Build Coastguard Worker        ushll           v4.8h, v6.8b, #4
1053*c0909341SAndroid Build Coastguard Worker        ushll2          v5.8h, v6.16b, #4
1054*c0909341SAndroid Build Coastguard Worker        umull           v6.8h, v7.8b, v24.8b
1055*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h, v7.16b, v24.16b
1056*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #2
1057*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
1058*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
1059*c0909341SAndroid Build Coastguard Worker        stp             q4, q5, [x0, #64]
1060*c0909341SAndroid Build Coastguard Worker        stp             q6, q7, [x0, #96]
1061*c0909341SAndroid Build Coastguard Worker        add             x0, x0, #128
1062*c0909341SAndroid Build Coastguard Worker        b.gt            32b
1063*c0909341SAndroid Build Coastguard Worker        ret
1064*c0909341SAndroid Build Coastguard Worker640:
1065*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1066*c0909341SAndroid Build Coastguard Worker64:
1067*c0909341SAndroid Build Coastguard Worker        ldp             q4, q5, [x1]
1068*c0909341SAndroid Build Coastguard Worker        ldp             q6, q7, [x1, #32]
1069*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2
1070*c0909341SAndroid Build Coastguard Worker        ushll           v0.8h, v4.8b, #4
1071*c0909341SAndroid Build Coastguard Worker        ushll2          v1.8h, v4.16b, #4
1072*c0909341SAndroid Build Coastguard Worker        umull           v2.8h, v5.8b, v24.8b
1073*c0909341SAndroid Build Coastguard Worker        umull2          v3.8h, v5.16b, v24.16b
1074*c0909341SAndroid Build Coastguard Worker        ushll           v4.8h, v6.8b, #4
1075*c0909341SAndroid Build Coastguard Worker        ushll2          v5.8h, v6.16b, #4
1076*c0909341SAndroid Build Coastguard Worker        umull           v6.8h, v7.8b, v24.8b
1077*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h, v7.16b, v24.16b
1078*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #1
1079*c0909341SAndroid Build Coastguard Worker        stp             q0, q1, [x0]
1080*c0909341SAndroid Build Coastguard Worker        stp             q2, q3, [x0, #32]
1081*c0909341SAndroid Build Coastguard Worker        stp             q4, q5, [x0, #64]
1082*c0909341SAndroid Build Coastguard Worker        stp             q6, q7, [x0, #96]
1083*c0909341SAndroid Build Coastguard Worker        add             x0, x0, #128
1084*c0909341SAndroid Build Coastguard Worker        b.gt            64b
1085*c0909341SAndroid Build Coastguard Worker        ret
1086*c0909341SAndroid Build Coastguard Worker1280:
1087*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1088*c0909341SAndroid Build Coastguard Worker128:
1089*c0909341SAndroid Build Coastguard Worker        ldp             q28, q29, [x1]
1090*c0909341SAndroid Build Coastguard Worker        ldp             q30, q31, [x1, #32]
1091*c0909341SAndroid Build Coastguard Worker        ushll           v16.8h, v28.8b, #4
1092*c0909341SAndroid Build Coastguard Worker        ushll2          v17.8h, v28.16b, #4
1093*c0909341SAndroid Build Coastguard Worker        umull           v18.8h, v29.8b, v24.8b
1094*c0909341SAndroid Build Coastguard Worker        umull2          v19.8h, v29.16b, v24.16b
1095*c0909341SAndroid Build Coastguard Worker        ushll           v20.8h, v30.8b, #4
1096*c0909341SAndroid Build Coastguard Worker        ushll2          v21.8h, v30.16b, #4
1097*c0909341SAndroid Build Coastguard Worker        umull           v22.8h, v31.8b, v24.8b
1098*c0909341SAndroid Build Coastguard Worker        umull2          v23.8h, v31.16b, v24.16b
1099*c0909341SAndroid Build Coastguard Worker        ldp             q28, q29, [x1, #64]
1100*c0909341SAndroid Build Coastguard Worker        ldp             q30, q31, [x1, #96]
1101*c0909341SAndroid Build Coastguard Worker        add             x1, x1, x2
1102*c0909341SAndroid Build Coastguard Worker        stp             q16, q17, [x0]
1103*c0909341SAndroid Build Coastguard Worker        stp             q18, q19, [x0, #32]
1104*c0909341SAndroid Build Coastguard Worker        stp             q20, q21, [x0, #64]
1105*c0909341SAndroid Build Coastguard Worker        stp             q22, q23, [x0, #96]
1106*c0909341SAndroid Build Coastguard Worker        ushll           v16.8h, v28.8b, #4
1107*c0909341SAndroid Build Coastguard Worker        ushll2          v17.8h, v28.16b, #4
1108*c0909341SAndroid Build Coastguard Worker        umull           v18.8h, v29.8b, v24.8b
1109*c0909341SAndroid Build Coastguard Worker        umull2          v19.8h, v29.16b, v24.16b
1110*c0909341SAndroid Build Coastguard Worker        ushll           v20.8h, v30.8b, #4
1111*c0909341SAndroid Build Coastguard Worker        ushll2          v21.8h, v30.16b, #4
1112*c0909341SAndroid Build Coastguard Worker        umull           v22.8h, v31.8b, v24.8b
1113*c0909341SAndroid Build Coastguard Worker        umull2          v23.8h, v31.16b, v24.16b
1114*c0909341SAndroid Build Coastguard Worker        subs            w4, w4, #1
1115*c0909341SAndroid Build Coastguard Worker        stp             q16, q17, [x0, #128]
1116*c0909341SAndroid Build Coastguard Worker        stp             q18, q19, [x0, #160]
1117*c0909341SAndroid Build Coastguard Worker        stp             q20, q21, [x0, #192]
1118*c0909341SAndroid Build Coastguard Worker        stp             q22, q23, [x0, #224]
1119*c0909341SAndroid Build Coastguard Worker        add             x0, x0, #256
1120*c0909341SAndroid Build Coastguard Worker        b.gt            128b
1121*c0909341SAndroid Build Coastguard Worker        ret
1122*c0909341SAndroid Build Coastguard Workerendfunc
1123*c0909341SAndroid Build Coastguard Worker
1124*c0909341SAndroid Build Coastguard Workerjumptable prep_tbl
1125*c0909341SAndroid Build Coastguard Worker        .word 1280b - prep_tbl
1126*c0909341SAndroid Build Coastguard Worker        .word 640b  - prep_tbl
1127*c0909341SAndroid Build Coastguard Worker        .word 320b  - prep_tbl
1128*c0909341SAndroid Build Coastguard Worker        .word 160b  - prep_tbl
1129*c0909341SAndroid Build Coastguard Worker        .word 80b   - prep_tbl
1130*c0909341SAndroid Build Coastguard Worker        .word 40b   - prep_tbl
1131*c0909341SAndroid Build Coastguard Workerendjumptable
1132*c0909341SAndroid Build Coastguard Worker
1133*c0909341SAndroid Build Coastguard Worker
1134*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6
1135*c0909341SAndroid Build Coastguard Worker        ld1             {\d0\wd}[0], [\s0], \strd
1136*c0909341SAndroid Build Coastguard Worker        ld1             {\d1\wd}[0], [\s1], \strd
1137*c0909341SAndroid Build Coastguard Worker.ifnb \d2
1138*c0909341SAndroid Build Coastguard Worker        ld1             {\d2\wd}[0], [\s0], \strd
1139*c0909341SAndroid Build Coastguard Worker        ld1             {\d3\wd}[0], [\s1], \strd
1140*c0909341SAndroid Build Coastguard Worker.endif
1141*c0909341SAndroid Build Coastguard Worker.ifnb \d4
1142*c0909341SAndroid Build Coastguard Worker        ld1             {\d4\wd}[0], [\s0], \strd
1143*c0909341SAndroid Build Coastguard Worker.endif
1144*c0909341SAndroid Build Coastguard Worker.ifnb \d5
1145*c0909341SAndroid Build Coastguard Worker        ld1             {\d5\wd}[0], [\s1], \strd
1146*c0909341SAndroid Build Coastguard Worker.endif
1147*c0909341SAndroid Build Coastguard Worker.ifnb \d6
1148*c0909341SAndroid Build Coastguard Worker        ld1             {\d6\wd}[0], [\s0], \strd
1149*c0909341SAndroid Build Coastguard Worker.endif
1150*c0909341SAndroid Build Coastguard Worker.endm
1151*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6
1152*c0909341SAndroid Build Coastguard Worker        ld1             {\d0\wd}, [\s0], \strd
1153*c0909341SAndroid Build Coastguard Worker        ld1             {\d1\wd}, [\s1], \strd
1154*c0909341SAndroid Build Coastguard Worker.ifnb \d2
1155*c0909341SAndroid Build Coastguard Worker        ld1             {\d2\wd}, [\s0], \strd
1156*c0909341SAndroid Build Coastguard Worker        ld1             {\d3\wd}, [\s1], \strd
1157*c0909341SAndroid Build Coastguard Worker.endif
1158*c0909341SAndroid Build Coastguard Worker.ifnb \d4
1159*c0909341SAndroid Build Coastguard Worker        ld1             {\d4\wd}, [\s0], \strd
1160*c0909341SAndroid Build Coastguard Worker.endif
1161*c0909341SAndroid Build Coastguard Worker.ifnb \d5
1162*c0909341SAndroid Build Coastguard Worker        ld1             {\d5\wd}, [\s1], \strd
1163*c0909341SAndroid Build Coastguard Worker.endif
1164*c0909341SAndroid Build Coastguard Worker.ifnb \d6
1165*c0909341SAndroid Build Coastguard Worker        ld1             {\d6\wd}, [\s0], \strd
1166*c0909341SAndroid Build Coastguard Worker.endif
1167*c0909341SAndroid Build Coastguard Worker.endm
1168*c0909341SAndroid Build Coastguard Worker.macro load_h s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1169*c0909341SAndroid Build Coastguard Worker        load_slice      \s0, \s1, \strd, .h, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1170*c0909341SAndroid Build Coastguard Worker.endm
1171*c0909341SAndroid Build Coastguard Worker.macro load_s s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1172*c0909341SAndroid Build Coastguard Worker        load_slice      \s0, \s1, \strd, .s, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1173*c0909341SAndroid Build Coastguard Worker.endm
1174*c0909341SAndroid Build Coastguard Worker.macro load_8b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1175*c0909341SAndroid Build Coastguard Worker        load_reg        \s0, \s1, \strd, .8b, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1176*c0909341SAndroid Build Coastguard Worker.endm
1177*c0909341SAndroid Build Coastguard Worker.macro load_16b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6
1178*c0909341SAndroid Build Coastguard Worker        load_reg        \s0, \s1, \strd, .16b, \d0, \d1, \d2, \d3, \d4, \d5, \d6
1179*c0909341SAndroid Build Coastguard Worker.endm
1180*c0909341SAndroid Build Coastguard Worker.macro interleave_1 wd, r0, r1, r2, r3, r4
1181*c0909341SAndroid Build Coastguard Worker        trn1            \r0\wd, \r0\wd, \r1\wd
1182*c0909341SAndroid Build Coastguard Worker        trn1            \r1\wd, \r1\wd, \r2\wd
1183*c0909341SAndroid Build Coastguard Worker.ifnb \r3
1184*c0909341SAndroid Build Coastguard Worker        trn1            \r2\wd, \r2\wd, \r3\wd
1185*c0909341SAndroid Build Coastguard Worker        trn1            \r3\wd, \r3\wd, \r4\wd
1186*c0909341SAndroid Build Coastguard Worker.endif
1187*c0909341SAndroid Build Coastguard Worker.endm
1188*c0909341SAndroid Build Coastguard Worker.macro interleave_1_h r0, r1, r2, r3, r4
1189*c0909341SAndroid Build Coastguard Worker        interleave_1    .4h, \r0, \r1, \r2, \r3, \r4
1190*c0909341SAndroid Build Coastguard Worker.endm
1191*c0909341SAndroid Build Coastguard Worker.macro interleave_1_s r0, r1, r2, r3, r4
1192*c0909341SAndroid Build Coastguard Worker        interleave_1    .2s, \r0, \r1, \r2, \r3, \r4
1193*c0909341SAndroid Build Coastguard Worker.endm
1194*c0909341SAndroid Build Coastguard Worker.macro interleave_2 wd, r0, r1, r2, r3, r4, r5
1195*c0909341SAndroid Build Coastguard Worker        trn1            \r0\wd,  \r0\wd, \r2\wd
1196*c0909341SAndroid Build Coastguard Worker        trn1            \r1\wd,  \r1\wd, \r3\wd
1197*c0909341SAndroid Build Coastguard Worker        trn1            \r2\wd,  \r2\wd, \r4\wd
1198*c0909341SAndroid Build Coastguard Worker        trn1            \r3\wd,  \r3\wd, \r5\wd
1199*c0909341SAndroid Build Coastguard Worker.endm
1200*c0909341SAndroid Build Coastguard Worker.macro interleave_2_s r0, r1, r2, r3, r4, r5
1201*c0909341SAndroid Build Coastguard Worker        interleave_2    .2s, \r0, \r1, \r2, \r3, \r4, \r5
1202*c0909341SAndroid Build Coastguard Worker.endm
1203*c0909341SAndroid Build Coastguard Worker.macro uxtl_b r0, r1, r2, r3, r4, r5, r6
1204*c0909341SAndroid Build Coastguard Worker        uxtl            \r0\().8h, \r0\().8b
1205*c0909341SAndroid Build Coastguard Worker        uxtl            \r1\().8h, \r1\().8b
1206*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1207*c0909341SAndroid Build Coastguard Worker        uxtl            \r2\().8h, \r2\().8b
1208*c0909341SAndroid Build Coastguard Worker        uxtl            \r3\().8h, \r3\().8b
1209*c0909341SAndroid Build Coastguard Worker.endif
1210*c0909341SAndroid Build Coastguard Worker.ifnb \r4
1211*c0909341SAndroid Build Coastguard Worker        uxtl            \r4\().8h, \r4\().8b
1212*c0909341SAndroid Build Coastguard Worker.endif
1213*c0909341SAndroid Build Coastguard Worker.ifnb \r5
1214*c0909341SAndroid Build Coastguard Worker        uxtl            \r5\().8h, \r5\().8b
1215*c0909341SAndroid Build Coastguard Worker.endif
1216*c0909341SAndroid Build Coastguard Worker.ifnb \r6
1217*c0909341SAndroid Build Coastguard Worker        uxtl            \r6\().8h, \r6\().8b
1218*c0909341SAndroid Build Coastguard Worker.endif
1219*c0909341SAndroid Build Coastguard Worker.endm
1220*c0909341SAndroid Build Coastguard Worker.macro mul_mla_4tap d, s0, s1, s2, s3, wd
1221*c0909341SAndroid Build Coastguard Worker        mul             \d\wd,  \s0\wd,  v0.h[0]
1222*c0909341SAndroid Build Coastguard Worker        mla             \d\wd,  \s1\wd,  v0.h[1]
1223*c0909341SAndroid Build Coastguard Worker        mla             \d\wd,  \s2\wd,  v0.h[2]
1224*c0909341SAndroid Build Coastguard Worker        mla             \d\wd,  \s3\wd,  v0.h[3]
1225*c0909341SAndroid Build Coastguard Worker.endm
1226*c0909341SAndroid Build Coastguard Worker// Interleaving the mul/mla chains actually hurts performance
1227*c0909341SAndroid Build Coastguard Worker// significantly on Cortex A53, thus keeping mul/mla tightly
1228*c0909341SAndroid Build Coastguard Worker// chained like this.
1229*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_0_4h d0, s0, s1, s2, s3, s4, s5, s6, s7
1230*c0909341SAndroid Build Coastguard Worker        mul             \d0\().4h, \s1\().4h, v0.h[1]
1231*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s2\().4h, v0.h[2]
1232*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s3\().4h, v0.h[3]
1233*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s4\().4h, v0.h[4]
1234*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s5\().4h, v0.h[5]
1235*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s6\().4h, v0.h[6]
1236*c0909341SAndroid Build Coastguard Worker.endm
1237*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_0 d0, s0, s1, s2, s3, s4, s5, s6, s7
1238*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s1\().8h, v0.h[1]
1239*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1240*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1241*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1242*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1243*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1244*c0909341SAndroid Build Coastguard Worker.endm
1245*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8
1246*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s1\().8h, v0.h[1]
1247*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1248*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1249*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1250*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1251*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1252*c0909341SAndroid Build Coastguard Worker        mul             \d1\().8h, \s2\().8h, v0.h[1]
1253*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s3\().8h, v0.h[2]
1254*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s4\().8h, v0.h[3]
1255*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s5\().8h, v0.h[4]
1256*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s6\().8h, v0.h[5]
1257*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s7\().8h, v0.h[6]
1258*c0909341SAndroid Build Coastguard Worker.endm
1259*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9
1260*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s1\().8h, v0.h[1]
1261*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1262*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1263*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1264*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1265*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1266*c0909341SAndroid Build Coastguard Worker        mul             \d1\().8h, \s3\().8h, v0.h[1]
1267*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s4\().8h, v0.h[2]
1268*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s5\().8h, v0.h[3]
1269*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s6\().8h, v0.h[4]
1270*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s7\().8h, v0.h[5]
1271*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s8\().8h, v0.h[6]
1272*c0909341SAndroid Build Coastguard Worker.endm
1273*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_0_4h d0, s0, s1, s2, s3, s4, s5, s6, s7
1274*c0909341SAndroid Build Coastguard Worker        mul             \d0\().4h, \s0\().4h, v0.h[0]
1275*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s1\().4h, v0.h[1]
1276*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s2\().4h, v0.h[2]
1277*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s3\().4h, v0.h[3]
1278*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s4\().4h, v0.h[4]
1279*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s5\().4h, v0.h[5]
1280*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s6\().4h, v0.h[6]
1281*c0909341SAndroid Build Coastguard Worker        mla             \d0\().4h, \s7\().4h, v0.h[7]
1282*c0909341SAndroid Build Coastguard Worker.endm
1283*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_0 d0, s0, s1, s2, s3, s4, s5, s6, s7
1284*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s0\().8h, v0.h[0]
1285*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s1\().8h, v0.h[1]
1286*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1287*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1288*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1289*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1290*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1291*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s7\().8h, v0.h[7]
1292*c0909341SAndroid Build Coastguard Worker.endm
1293*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8
1294*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s0\().8h, v0.h[0]
1295*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s1\().8h, v0.h[1]
1296*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1297*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1298*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1299*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1300*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1301*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s7\().8h, v0.h[7]
1302*c0909341SAndroid Build Coastguard Worker        mul             \d1\().8h, \s1\().8h, v0.h[0]
1303*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s2\().8h, v0.h[1]
1304*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s3\().8h, v0.h[2]
1305*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s4\().8h, v0.h[3]
1306*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s5\().8h, v0.h[4]
1307*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s6\().8h, v0.h[5]
1308*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s7\().8h, v0.h[6]
1309*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s8\().8h, v0.h[7]
1310*c0909341SAndroid Build Coastguard Worker.endm
1311*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9
1312*c0909341SAndroid Build Coastguard Worker        mul             \d0\().8h, \s0\().8h, v0.h[0]
1313*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s1\().8h, v0.h[1]
1314*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s2\().8h, v0.h[2]
1315*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s3\().8h, v0.h[3]
1316*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s4\().8h, v0.h[4]
1317*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s5\().8h, v0.h[5]
1318*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s6\().8h, v0.h[6]
1319*c0909341SAndroid Build Coastguard Worker        mla             \d0\().8h, \s7\().8h, v0.h[7]
1320*c0909341SAndroid Build Coastguard Worker        mul             \d1\().8h, \s2\().8h, v0.h[0]
1321*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s3\().8h, v0.h[1]
1322*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s4\().8h, v0.h[2]
1323*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s5\().8h, v0.h[3]
1324*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s6\().8h, v0.h[4]
1325*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s7\().8h, v0.h[5]
1326*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s8\().8h, v0.h[6]
1327*c0909341SAndroid Build Coastguard Worker        mla             \d1\().8h, \s9\().8h, v0.h[7]
1328*c0909341SAndroid Build Coastguard Worker.endm
1329*c0909341SAndroid Build Coastguard Worker.macro sqrshrun_b shift, r0, r1, r2, r3
1330*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r0\().8b, \r0\().8h,  #\shift
1331*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1332*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r1\().8b, \r1\().8h,  #\shift
1333*c0909341SAndroid Build Coastguard Worker.endif
1334*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1335*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r2\().8b, \r2\().8h,  #\shift
1336*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r3\().8b, \r3\().8h,  #\shift
1337*c0909341SAndroid Build Coastguard Worker.endif
1338*c0909341SAndroid Build Coastguard Worker.endm
1339*c0909341SAndroid Build Coastguard Worker.macro srshr_h shift, r0, r1, r2, r3
1340*c0909341SAndroid Build Coastguard Worker        srshr           \r0\().8h, \r0\().8h,  #\shift
1341*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1342*c0909341SAndroid Build Coastguard Worker        srshr           \r1\().8h, \r1\().8h,  #\shift
1343*c0909341SAndroid Build Coastguard Worker.endif
1344*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1345*c0909341SAndroid Build Coastguard Worker        srshr           \r2\().8h, \r2\().8h,  #\shift
1346*c0909341SAndroid Build Coastguard Worker        srshr           \r3\().8h, \r3\().8h,  #\shift
1347*c0909341SAndroid Build Coastguard Worker.endif
1348*c0909341SAndroid Build Coastguard Worker.endm
1349*c0909341SAndroid Build Coastguard Worker.macro st_h strd, reg, lanes
1350*c0909341SAndroid Build Coastguard Worker        st1             {\reg\().h}[0], [x0], \strd
1351*c0909341SAndroid Build Coastguard Worker        st1             {\reg\().h}[1], [x8], \strd
1352*c0909341SAndroid Build Coastguard Worker.if \lanes > 2
1353*c0909341SAndroid Build Coastguard Worker        st1             {\reg\().h}[2], [x0], \strd
1354*c0909341SAndroid Build Coastguard Worker        st1             {\reg\().h}[3], [x8], \strd
1355*c0909341SAndroid Build Coastguard Worker.endif
1356*c0909341SAndroid Build Coastguard Worker.endm
1357*c0909341SAndroid Build Coastguard Worker.macro st_s strd, r0, r1
1358*c0909341SAndroid Build Coastguard Worker        st1             {\r0\().s}[0], [x0], \strd
1359*c0909341SAndroid Build Coastguard Worker        st1             {\r0\().s}[1], [x8], \strd
1360*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1361*c0909341SAndroid Build Coastguard Worker        st1             {\r1\().s}[0], [x0], \strd
1362*c0909341SAndroid Build Coastguard Worker        st1             {\r1\().s}[1], [x8], \strd
1363*c0909341SAndroid Build Coastguard Worker.endif
1364*c0909341SAndroid Build Coastguard Worker.endm
1365*c0909341SAndroid Build Coastguard Worker.macro st_d strd, r0, r1
1366*c0909341SAndroid Build Coastguard Worker        st1             {\r0\().8b},   [x0], \strd
1367*c0909341SAndroid Build Coastguard Worker        st1             {\r0\().d}[1], [x8], \strd
1368*c0909341SAndroid Build Coastguard Worker.ifnb \r1
1369*c0909341SAndroid Build Coastguard Worker        st1             {\r1\().8b},   [x0], \strd
1370*c0909341SAndroid Build Coastguard Worker        st1             {\r1\().d}[1], [x8], \strd
1371*c0909341SAndroid Build Coastguard Worker.endif
1372*c0909341SAndroid Build Coastguard Worker.endm
1373*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, r0, r1
1374*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1375*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,     \r0, \r1
1376*c0909341SAndroid Build Coastguard Worker        st_s            \strd, \r0, \r1
1377*c0909341SAndroid Build Coastguard Worker.else
1378*c0909341SAndroid Build Coastguard Worker        srshr_h         2,     \r0, \r1
1379*c0909341SAndroid Build Coastguard Worker        st_d            \strd, \r0, \r1
1380*c0909341SAndroid Build Coastguard Worker.endif
1381*c0909341SAndroid Build Coastguard Worker.endm
1382*c0909341SAndroid Build Coastguard Worker.macro st_reg strd, wd, r0, r1, r2, r3, r4, r5, r6, r7
1383*c0909341SAndroid Build Coastguard Worker        st1             {\r0\wd}, [x0], \strd
1384*c0909341SAndroid Build Coastguard Worker        st1             {\r1\wd}, [x8], \strd
1385*c0909341SAndroid Build Coastguard Worker.ifnb \r2
1386*c0909341SAndroid Build Coastguard Worker        st1             {\r2\wd}, [x0], \strd
1387*c0909341SAndroid Build Coastguard Worker        st1             {\r3\wd}, [x8], \strd
1388*c0909341SAndroid Build Coastguard Worker.endif
1389*c0909341SAndroid Build Coastguard Worker.ifnb \r4
1390*c0909341SAndroid Build Coastguard Worker        st1             {\r4\wd}, [x0], \strd
1391*c0909341SAndroid Build Coastguard Worker        st1             {\r5\wd}, [x8], \strd
1392*c0909341SAndroid Build Coastguard Worker        st1             {\r6\wd}, [x0], \strd
1393*c0909341SAndroid Build Coastguard Worker        st1             {\r7\wd}, [x8], \strd
1394*c0909341SAndroid Build Coastguard Worker.endif
1395*c0909341SAndroid Build Coastguard Worker.endm
1396*c0909341SAndroid Build Coastguard Worker.macro st_8b strd, r0, r1, r2, r3, r4, r5, r6, r7
1397*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, .8b,  \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
1398*c0909341SAndroid Build Coastguard Worker.endm
1399*c0909341SAndroid Build Coastguard Worker.macro st_16b strd, r0, r1, r2, r3, r4, r5, r6, r7
1400*c0909341SAndroid Build Coastguard Worker        st_reg          \strd, .16b, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
1401*c0909341SAndroid Build Coastguard Worker.endm
1402*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, r0, r1, r2, r3
1403*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1404*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,     \r0, \r1, \r2, \r3
1405*c0909341SAndroid Build Coastguard Worker        st_8b           \strd, \r0, \r1, \r2, \r3
1406*c0909341SAndroid Build Coastguard Worker.else
1407*c0909341SAndroid Build Coastguard Worker        srshr_h         2,     \r0, \r1, \r2, \r3
1408*c0909341SAndroid Build Coastguard Worker        st_16b          \strd, \r0, \r1, \r2, \r3
1409*c0909341SAndroid Build Coastguard Worker.endif
1410*c0909341SAndroid Build Coastguard Worker.endm
1411*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, r0, r1, r2, r3
1412*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1413*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r0\().8b,  \r0\().8h, #6
1414*c0909341SAndroid Build Coastguard Worker        sqrshrun2       \r0\().16b, \r1\().8h, #6
1415*c0909341SAndroid Build Coastguard Worker        sqrshrun        \r2\().8b,  \r2\().8h, #6
1416*c0909341SAndroid Build Coastguard Worker        sqrshrun2       \r2\().16b, \r3\().8h, #6
1417*c0909341SAndroid Build Coastguard Worker        st_16b          \strd, \r0, \r2
1418*c0909341SAndroid Build Coastguard Worker.else
1419*c0909341SAndroid Build Coastguard Worker        srshr_h         2,     \r0, \r1, \r2, \r3
1420*c0909341SAndroid Build Coastguard Worker        st1             {\r0\().8h, \r1\().8h}, [x0], \strd
1421*c0909341SAndroid Build Coastguard Worker        st1             {\r2\().8h, \r3\().8h}, [x8], \strd
1422*c0909341SAndroid Build Coastguard Worker.endif
1423*c0909341SAndroid Build Coastguard Worker.endm
1424*c0909341SAndroid Build Coastguard Worker
1425*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v, taps
1426*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_8bpc_neon, export=1
1427*c0909341SAndroid Build Coastguard Worker        mov             x8,  \type_h
1428*c0909341SAndroid Build Coastguard Worker        mov             x9,  \type_v
1429*c0909341SAndroid Build Coastguard Worker        b               \op\()_\taps\()_neon
1430*c0909341SAndroid Build Coastguard Workerendfunc
1431*c0909341SAndroid Build Coastguard Worker.endm
1432*c0909341SAndroid Build Coastguard Worker
1433*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor.
1434*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15)
1435*c0909341SAndroid Build Coastguard Worker#define SMOOTH  ((1*15<<7)|4*15)
1436*c0909341SAndroid Build Coastguard Worker#define SHARP   ((2*15<<7)|3*15)
1437*c0909341SAndroid Build Coastguard Worker
1438*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, ds2, sr2, shift_hv, taps
1439*c0909341SAndroid Build Coastguard Workerfunction \type\()_\taps\()_neon
1440*c0909341SAndroid Build Coastguard Worker        mov             w10,  #0x4081  // (1 << 14) | (1 << 7) | (1 << 0)
1441*c0909341SAndroid Build Coastguard Worker        mul             \mx,  \mx, w10
1442*c0909341SAndroid Build Coastguard Worker        mul             \my,  \my, w10
1443*c0909341SAndroid Build Coastguard Worker        add             \mx,  \mx, w8 // mx, 8tap_h, 4tap_h
1444*c0909341SAndroid Build Coastguard Worker        add             \my,  \my, w9 // my, 8tap_v, 4tap_v
1445*c0909341SAndroid Build Coastguard Worker.ifc \type, prep
1446*c0909341SAndroid Build Coastguard Worker        uxtw            \d_strd, \w
1447*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1448*c0909341SAndroid Build Coastguard Worker.endif
1449*c0909341SAndroid Build Coastguard Worker
1450*c0909341SAndroid Build Coastguard Worker        clz             w8,  \w
1451*c0909341SAndroid Build Coastguard Worker        tst             \mx, #(0x7f << 14)
1452*c0909341SAndroid Build Coastguard Worker        sub             w8,  w8,  #24
1453*c0909341SAndroid Build Coastguard Worker        movrel          x10, X(mc_subpel_filters), -8
1454*c0909341SAndroid Build Coastguard Worker        b.ne            L(\type\()_\taps\()_h)
1455*c0909341SAndroid Build Coastguard Worker        tst             \my, #(0x7f << 14)
1456*c0909341SAndroid Build Coastguard Worker        b.ne            L(\type\()_\taps\()_v)
1457*c0909341SAndroid Build Coastguard Worker        b               \type\()_neon
1458*c0909341SAndroid Build Coastguard Worker
1459*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_h):
1460*c0909341SAndroid Build Coastguard Worker        cmp             \w,  #4
1461*c0909341SAndroid Build Coastguard Worker        ubfx            w9,  \mx, #7, #7
1462*c0909341SAndroid Build Coastguard Worker        and             \mx, \mx, #0x7f
1463*c0909341SAndroid Build Coastguard Worker        b.le            4f
1464*c0909341SAndroid Build Coastguard Worker        mov             \mx,  w9
1465*c0909341SAndroid Build Coastguard Worker4:
1466*c0909341SAndroid Build Coastguard Worker        tst             \my,  #(0x7f << 14)
1467*c0909341SAndroid Build Coastguard Worker        add             \xmx, x10, \mx, uxtw #3
1468*c0909341SAndroid Build Coastguard Worker        b.ne            L(\type\()_\taps\()_hv)
1469*c0909341SAndroid Build Coastguard Worker
1470*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_\taps\()_h_tbl
1471*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
1472*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
1473*c0909341SAndroid Build Coastguard Worker        br              x9
1474*c0909341SAndroid Build Coastguard Worker
1475*c0909341SAndroid Build Coastguard Worker20:     // 2xN h
1476*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1477*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1478*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmx, #2]
1479*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #1
1480*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1481*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1482*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1483*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1484*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
1485*c0909341SAndroid Build Coastguard Worker2:
1486*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8b},  [\src], \s_strd
1487*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8b},  [\sr2], \s_strd
1488*c0909341SAndroid Build Coastguard Worker        uxtl            v4.8h,  v4.8b
1489*c0909341SAndroid Build Coastguard Worker        uxtl            v6.8h,  v6.8b
1490*c0909341SAndroid Build Coastguard Worker        ext             v5.16b, v4.16b, v4.16b, #2
1491*c0909341SAndroid Build Coastguard Worker        ext             v7.16b, v6.16b, v6.16b, #2
1492*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1493*c0909341SAndroid Build Coastguard Worker        trn1            v3.2s,  v4.2s,  v6.2s
1494*c0909341SAndroid Build Coastguard Worker        trn2            v6.2s,  v4.2s,  v6.2s
1495*c0909341SAndroid Build Coastguard Worker        trn1            v4.2s,  v5.2s,  v7.2s
1496*c0909341SAndroid Build Coastguard Worker        trn2            v7.2s,  v5.2s,  v7.2s
1497*c0909341SAndroid Build Coastguard Worker        mul             v3.4h,  v3.4h,  v0.h[0]
1498*c0909341SAndroid Build Coastguard Worker        mla             v3.4h,  v4.4h,  v0.h[1]
1499*c0909341SAndroid Build Coastguard Worker        mla             v3.4h,  v6.4h,  v0.h[2]
1500*c0909341SAndroid Build Coastguard Worker        mla             v3.4h,  v7.4h,  v0.h[3]
1501*c0909341SAndroid Build Coastguard Worker        srshr           v3.4h,  v3.4h,  #2
1502*c0909341SAndroid Build Coastguard Worker        sqrshrun        v3.8b,  v3.8h,  #4
1503*c0909341SAndroid Build Coastguard Worker        st1             {v3.h}[0], [\dst], \d_strd
1504*c0909341SAndroid Build Coastguard Worker        st1             {v3.h}[1], [\ds2], \d_strd
1505*c0909341SAndroid Build Coastguard Worker        b.gt            2b
1506*c0909341SAndroid Build Coastguard Worker        ret
1507*c0909341SAndroid Build Coastguard Worker.endif
1508*c0909341SAndroid Build Coastguard Worker
1509*c0909341SAndroid Build Coastguard Worker40:     // 4xN h
1510*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1511*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmx, #2]
1512*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #1
1513*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1514*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1515*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1516*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1517*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
1518*c0909341SAndroid Build Coastguard Worker4:
1519*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8b}, [\src], \s_strd
1520*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8b}, [\sr2], \s_strd
1521*c0909341SAndroid Build Coastguard Worker        uxtl            v16.8h,  v16.8b
1522*c0909341SAndroid Build Coastguard Worker        uxtl            v20.8h,  v20.8b
1523*c0909341SAndroid Build Coastguard Worker        ext             v17.16b, v16.16b, v16.16b, #2
1524*c0909341SAndroid Build Coastguard Worker        ext             v18.16b, v16.16b, v16.16b, #4
1525*c0909341SAndroid Build Coastguard Worker        ext             v19.16b, v16.16b, v16.16b, #6
1526*c0909341SAndroid Build Coastguard Worker        ext             v21.16b, v20.16b, v20.16b, #2
1527*c0909341SAndroid Build Coastguard Worker        ext             v22.16b, v20.16b, v20.16b, #4
1528*c0909341SAndroid Build Coastguard Worker        ext             v23.16b, v20.16b, v20.16b, #6
1529*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1530*c0909341SAndroid Build Coastguard Worker        mul             v16.4h,  v16.4h,  v0.h[0]
1531*c0909341SAndroid Build Coastguard Worker        mla             v16.4h,  v17.4h,  v0.h[1]
1532*c0909341SAndroid Build Coastguard Worker        mla             v16.4h,  v18.4h,  v0.h[2]
1533*c0909341SAndroid Build Coastguard Worker        mla             v16.4h,  v19.4h,  v0.h[3]
1534*c0909341SAndroid Build Coastguard Worker        mul             v20.4h,  v20.4h,  v0.h[0]
1535*c0909341SAndroid Build Coastguard Worker        mla             v20.4h,  v21.4h,  v0.h[1]
1536*c0909341SAndroid Build Coastguard Worker        mla             v20.4h,  v22.4h,  v0.h[2]
1537*c0909341SAndroid Build Coastguard Worker        mla             v20.4h,  v23.4h,  v0.h[3]
1538*c0909341SAndroid Build Coastguard Worker        srshr           v16.4h,  v16.4h,  #2
1539*c0909341SAndroid Build Coastguard Worker        srshr           v20.4h,  v20.4h,  #2
1540*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1541*c0909341SAndroid Build Coastguard Worker        sqrshrun        v16.8b,  v16.8h,  #4
1542*c0909341SAndroid Build Coastguard Worker        sqrshrun        v20.8b,  v20.8h,  #4
1543*c0909341SAndroid Build Coastguard Worker        str             s16,  [\dst]
1544*c0909341SAndroid Build Coastguard Worker        str             s20,  [\ds2]
1545*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, \d_strd
1546*c0909341SAndroid Build Coastguard Worker        add             \ds2, \ds2, \d_strd
1547*c0909341SAndroid Build Coastguard Worker.else
1548*c0909341SAndroid Build Coastguard Worker        st1             {v16.4h}, [\dst], \d_strd
1549*c0909341SAndroid Build Coastguard Worker        st1             {v20.4h}, [\ds2], \d_strd
1550*c0909341SAndroid Build Coastguard Worker.endif
1551*c0909341SAndroid Build Coastguard Worker        b.gt            4b
1552*c0909341SAndroid Build Coastguard Worker        ret
1553*c0909341SAndroid Build Coastguard Worker
1554*c0909341SAndroid Build Coastguard Worker80:     // 8xN h
1555*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1556*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [\xmx]
1557*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
1558*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #2
1559*c0909341SAndroid Build Coastguard Worker.else
1560*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
1561*c0909341SAndroid Build Coastguard Worker.endif
1562*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1563*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1564*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1565*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1566*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1567*c0909341SAndroid Build Coastguard Worker8:
1568*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8b, v17.8b},  [\src], \s_strd
1569*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8b, v21.8b},  [\sr2], \s_strd
1570*c0909341SAndroid Build Coastguard Worker        uxtl            v16.8h,  v16.8b
1571*c0909341SAndroid Build Coastguard Worker        uxtl            v17.8h,  v17.8b
1572*c0909341SAndroid Build Coastguard Worker        uxtl            v20.8h,  v20.8b
1573*c0909341SAndroid Build Coastguard Worker        uxtl            v21.8h,  v21.8b
1574*c0909341SAndroid Build Coastguard Worker
1575*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
1576*c0909341SAndroid Build Coastguard Worker        mul             v18.8h,  v16.8h,  v0.h[1]
1577*c0909341SAndroid Build Coastguard Worker        mul             v22.8h,  v20.8h,  v0.h[1]
1578*c0909341SAndroid Build Coastguard Worker    .irpc i, 23456
1579*c0909341SAndroid Build Coastguard Worker        ext             v19.16b, v16.16b, v17.16b, #(2*\i-2)
1580*c0909341SAndroid Build Coastguard Worker        ext             v23.16b, v20.16b, v21.16b, #(2*\i-2)
1581*c0909341SAndroid Build Coastguard Worker        mla             v18.8h,  v19.8h,  v0.h[\i]
1582*c0909341SAndroid Build Coastguard Worker        mla             v22.8h,  v23.8h,  v0.h[\i]
1583*c0909341SAndroid Build Coastguard Worker    .endr
1584*c0909341SAndroid Build Coastguard Worker.else   // 8tap
1585*c0909341SAndroid Build Coastguard Worker        mul             v18.8h,  v16.8h,  v0.h[0]
1586*c0909341SAndroid Build Coastguard Worker        mul             v22.8h,  v20.8h,  v0.h[0]
1587*c0909341SAndroid Build Coastguard Worker    .irpc i, 1234567
1588*c0909341SAndroid Build Coastguard Worker        ext             v19.16b, v16.16b, v17.16b, #(2*\i)
1589*c0909341SAndroid Build Coastguard Worker        ext             v23.16b, v20.16b, v21.16b, #(2*\i)
1590*c0909341SAndroid Build Coastguard Worker        mla             v18.8h,  v19.8h,  v0.h[\i]
1591*c0909341SAndroid Build Coastguard Worker        mla             v22.8h,  v23.8h,  v0.h[\i]
1592*c0909341SAndroid Build Coastguard Worker    .endr
1593*c0909341SAndroid Build Coastguard Worker.endif
1594*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1595*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v18.8h, #2
1596*c0909341SAndroid Build Coastguard Worker        srshr           v22.8h,  v22.8h, #2
1597*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1598*c0909341SAndroid Build Coastguard Worker        sqrshrun        v18.8b,  v18.8h, #4
1599*c0909341SAndroid Build Coastguard Worker        sqrshrun        v22.8b,  v22.8h, #4
1600*c0909341SAndroid Build Coastguard Worker        st1             {v18.8b}, [\dst], \d_strd
1601*c0909341SAndroid Build Coastguard Worker        st1             {v22.8b}, [\ds2], \d_strd
1602*c0909341SAndroid Build Coastguard Worker.else
1603*c0909341SAndroid Build Coastguard Worker        st1             {v18.8h}, [\dst], \d_strd
1604*c0909341SAndroid Build Coastguard Worker        st1             {v22.8h}, [\ds2], \d_strd
1605*c0909341SAndroid Build Coastguard Worker.endif
1606*c0909341SAndroid Build Coastguard Worker        b.gt            8b
1607*c0909341SAndroid Build Coastguard Worker        ret
1608*c0909341SAndroid Build Coastguard Worker160:
1609*c0909341SAndroid Build Coastguard Worker320:
1610*c0909341SAndroid Build Coastguard Worker640:
1611*c0909341SAndroid Build Coastguard Worker1280:   // 16xN, 32xN, ... h
1612*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1613*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [\xmx]
1614*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
1615*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #2
1616*c0909341SAndroid Build Coastguard Worker.else
1617*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
1618*c0909341SAndroid Build Coastguard Worker.endif
1619*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1620*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1621*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1622*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1623*c0909341SAndroid Build Coastguard Worker
1624*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  \w, uxtw
1625*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  #8
1626*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1627*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1628*c0909341SAndroid Build Coastguard Worker        sub             \d_strd,  \d_strd,  \w, uxtw
1629*c0909341SAndroid Build Coastguard Worker.endif
1630*c0909341SAndroid Build Coastguard Worker161:
1631*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8b, v17.8b, v18.8b},  [\src], #24
1632*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8b, v21.8b, v22.8b},  [\sr2], #24
1633*c0909341SAndroid Build Coastguard Worker        mov             \mx, \w
1634*c0909341SAndroid Build Coastguard Worker        uxtl            v16.8h,  v16.8b
1635*c0909341SAndroid Build Coastguard Worker        uxtl            v17.8h,  v17.8b
1636*c0909341SAndroid Build Coastguard Worker        uxtl            v18.8h,  v18.8b
1637*c0909341SAndroid Build Coastguard Worker        uxtl            v20.8h,  v20.8b
1638*c0909341SAndroid Build Coastguard Worker        uxtl            v21.8h,  v21.8b
1639*c0909341SAndroid Build Coastguard Worker        uxtl            v22.8h,  v22.8b
1640*c0909341SAndroid Build Coastguard Worker
1641*c0909341SAndroid Build Coastguard Worker16:
1642*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
1643*c0909341SAndroid Build Coastguard Worker        mul             v24.8h,  v16.8h,  v0.h[1]
1644*c0909341SAndroid Build Coastguard Worker        mul             v25.8h,  v17.8h,  v0.h[1]
1645*c0909341SAndroid Build Coastguard Worker        mul             v26.8h,  v20.8h,  v0.h[1]
1646*c0909341SAndroid Build Coastguard Worker        mul             v27.8h,  v21.8h,  v0.h[1]
1647*c0909341SAndroid Build Coastguard Worker    .irpc i, 23456
1648*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v16.16b, v17.16b, #(2*\i-2)
1649*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v17.16b, v18.16b, #(2*\i-2)
1650*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v20.16b, v21.16b, #(2*\i-2)
1651*c0909341SAndroid Build Coastguard Worker        ext             v31.16b, v21.16b, v22.16b, #(2*\i-2)
1652*c0909341SAndroid Build Coastguard Worker        mla             v24.8h,  v28.8h,  v0.h[\i]
1653*c0909341SAndroid Build Coastguard Worker        mla             v25.8h,  v29.8h,  v0.h[\i]
1654*c0909341SAndroid Build Coastguard Worker        mla             v26.8h,  v30.8h,  v0.h[\i]
1655*c0909341SAndroid Build Coastguard Worker        mla             v27.8h,  v31.8h,  v0.h[\i]
1656*c0909341SAndroid Build Coastguard Worker    .endr
1657*c0909341SAndroid Build Coastguard Worker.else   // 8tap
1658*c0909341SAndroid Build Coastguard Worker        mul             v24.8h,  v16.8h,  v0.h[0]
1659*c0909341SAndroid Build Coastguard Worker        mul             v25.8h,  v17.8h,  v0.h[0]
1660*c0909341SAndroid Build Coastguard Worker        mul             v26.8h,  v20.8h,  v0.h[0]
1661*c0909341SAndroid Build Coastguard Worker        mul             v27.8h,  v21.8h,  v0.h[0]
1662*c0909341SAndroid Build Coastguard Worker    .irpc i, 1234567
1663*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v16.16b, v17.16b, #(2*\i)
1664*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v17.16b, v18.16b, #(2*\i)
1665*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v20.16b, v21.16b, #(2*\i)
1666*c0909341SAndroid Build Coastguard Worker        ext             v31.16b, v21.16b, v22.16b, #(2*\i)
1667*c0909341SAndroid Build Coastguard Worker        mla             v24.8h,  v28.8h,  v0.h[\i]
1668*c0909341SAndroid Build Coastguard Worker        mla             v25.8h,  v29.8h,  v0.h[\i]
1669*c0909341SAndroid Build Coastguard Worker        mla             v26.8h,  v30.8h,  v0.h[\i]
1670*c0909341SAndroid Build Coastguard Worker        mla             v27.8h,  v31.8h,  v0.h[\i]
1671*c0909341SAndroid Build Coastguard Worker    .endr
1672*c0909341SAndroid Build Coastguard Worker.endif
1673*c0909341SAndroid Build Coastguard Worker        srshr           v24.8h,  v24.8h, #2
1674*c0909341SAndroid Build Coastguard Worker        srshr           v25.8h,  v25.8h, #2
1675*c0909341SAndroid Build Coastguard Worker        srshr           v26.8h,  v26.8h, #2
1676*c0909341SAndroid Build Coastguard Worker        srshr           v27.8h,  v27.8h, #2
1677*c0909341SAndroid Build Coastguard Worker        subs            \mx, \mx, #16
1678*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1679*c0909341SAndroid Build Coastguard Worker        sqrshrun        v24.8b,  v24.8h, #4
1680*c0909341SAndroid Build Coastguard Worker        sqrshrun2       v24.16b, v25.8h, #4
1681*c0909341SAndroid Build Coastguard Worker        sqrshrun        v26.8b,  v26.8h, #4
1682*c0909341SAndroid Build Coastguard Worker        sqrshrun2       v26.16b, v27.8h, #4
1683*c0909341SAndroid Build Coastguard Worker        st1             {v24.16b}, [\dst], #16
1684*c0909341SAndroid Build Coastguard Worker        st1             {v26.16b}, [\ds2], #16
1685*c0909341SAndroid Build Coastguard Worker.else
1686*c0909341SAndroid Build Coastguard Worker        st1             {v24.8h, v25.8h}, [\dst], #32
1687*c0909341SAndroid Build Coastguard Worker        st1             {v26.8h, v27.8h}, [\ds2], #32
1688*c0909341SAndroid Build Coastguard Worker.endif
1689*c0909341SAndroid Build Coastguard Worker        b.le            9f
1690*c0909341SAndroid Build Coastguard Worker
1691*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
1692*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v22.16b
1693*c0909341SAndroid Build Coastguard Worker        ld1             {v17.8b, v18.8b}, [\src], #16
1694*c0909341SAndroid Build Coastguard Worker        ld1             {v21.8b, v22.8b}, [\sr2], #16
1695*c0909341SAndroid Build Coastguard Worker        uxtl            v17.8h,  v17.8b
1696*c0909341SAndroid Build Coastguard Worker        uxtl            v18.8h,  v18.8b
1697*c0909341SAndroid Build Coastguard Worker        uxtl            v21.8h,  v21.8b
1698*c0909341SAndroid Build Coastguard Worker        uxtl            v22.8h,  v22.8b
1699*c0909341SAndroid Build Coastguard Worker        b               16b
1700*c0909341SAndroid Build Coastguard Worker
1701*c0909341SAndroid Build Coastguard Worker9:
1702*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  \d_strd
1703*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \ds2,  \d_strd
1704*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  \s_strd
1705*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \sr2,  \s_strd
1706*c0909341SAndroid Build Coastguard Worker
1707*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1708*c0909341SAndroid Build Coastguard Worker        b.gt            161b
1709*c0909341SAndroid Build Coastguard Worker        ret
1710*c0909341SAndroid Build Coastguard Workerendfunc
1711*c0909341SAndroid Build Coastguard Worker
1712*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_h_tbl
1713*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_\taps\()_h_tbl
1714*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_\taps\()_h_tbl
1715*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_\taps\()_h_tbl
1716*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_\taps\()_h_tbl
1717*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_\taps\()_h_tbl
1718*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_\taps\()_h_tbl
1719*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_\taps\()_h_tbl
1720*c0909341SAndroid Build Coastguard Workerendjumptable
1721*c0909341SAndroid Build Coastguard Worker
1722*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_v)
1723*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
1724*c0909341SAndroid Build Coastguard Worker        ubfx            w9,  \my, #7, #7
1725*c0909341SAndroid Build Coastguard Worker        and             \my, \my, #0x7f
1726*c0909341SAndroid Build Coastguard Worker        b.le            4f
1727*c0909341SAndroid Build Coastguard Worker        mov             \my, w9
1728*c0909341SAndroid Build Coastguard Worker4:
1729*c0909341SAndroid Build Coastguard Worker        add             \xmy, x10, \my, uxtw #3
1730*c0909341SAndroid Build Coastguard Worker
1731*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_\taps\()_v_tbl
1732*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
1733*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
1734*c0909341SAndroid Build Coastguard Worker        br              x9
1735*c0909341SAndroid Build Coastguard Worker
1736*c0909341SAndroid Build Coastguard Worker20:     // 2xN v
1737*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1738*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1739*c0909341SAndroid Build Coastguard Worker        b.gt            28f
1740*c0909341SAndroid Build Coastguard Worker
1741*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1742*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmy, #2]
1743*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
1744*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1745*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
1746*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1747*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1748*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1749*c0909341SAndroid Build Coastguard Worker
1750*c0909341SAndroid Build Coastguard Worker        // 2x2 v
1751*c0909341SAndroid Build Coastguard Worker        load_h          \src, \sr2, \s_strd, v1, v2, v3, v4, v5
1752*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v1, v2, v3, v4, v5
1753*c0909341SAndroid Build Coastguard Worker        b.gt            24f
1754*c0909341SAndroid Build Coastguard Worker        uxtl_b          v1, v2, v3, v4
1755*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v6, v1, v2, v3, v4, .4h
1756*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,  v6
1757*c0909341SAndroid Build Coastguard Worker        st_h            \d_strd, v6, 2
1758*c0909341SAndroid Build Coastguard Worker        ret
1759*c0909341SAndroid Build Coastguard Worker
1760*c0909341SAndroid Build Coastguard Worker24:     // 2x4 v
1761*c0909341SAndroid Build Coastguard Worker        load_h          \sr2, \src, \s_strd, v6, v7
1762*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v5, v6, v7
1763*c0909341SAndroid Build Coastguard Worker        interleave_2_s  v1, v2, v3, v4, v5, v6
1764*c0909341SAndroid Build Coastguard Worker        uxtl_b          v1, v2, v3, v4
1765*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v6, v1, v2, v3, v4, .8h
1766*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,  v6
1767*c0909341SAndroid Build Coastguard Worker        st_h            \d_strd, v6, 4
1768*c0909341SAndroid Build Coastguard Worker        ret
1769*c0909341SAndroid Build Coastguard Worker
1770*c0909341SAndroid Build Coastguard Worker28:     // 2x6, 2x8, 2x12, 2x16 v
1771*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [\xmy]
1772*c0909341SAndroid Build Coastguard Worker        sub             \sr2,  \src,  \s_strd, lsl #1
1773*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
1774*c0909341SAndroid Build Coastguard Worker        sub             \src,  \sr2,  \s_strd
1775*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
1776*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
1777*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1778*c0909341SAndroid Build Coastguard Worker
1779*c0909341SAndroid Build Coastguard Worker        load_h          \src, \sr2, \s_strd, v1,  v2,  v3,  v4, v5, v6, v7
1780*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v1,  v2,  v3,  v4,  v5
1781*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v5,  v6,  v7
1782*c0909341SAndroid Build Coastguard Worker        interleave_2_s  v1,  v2,  v3,  v4,  v5,  v6
1783*c0909341SAndroid Build Coastguard Worker        uxtl_b          v1,  v2,  v3,  v4
1784*c0909341SAndroid Build Coastguard Worker216:
1785*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1786*c0909341SAndroid Build Coastguard Worker        load_h          \sr2, \src, \s_strd, v16, v17, v18, v19
1787*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v7,  v16, v17, v18, v19
1788*c0909341SAndroid Build Coastguard Worker        interleave_2_s  v5,  v6,  v7,  v16, v17, v18
1789*c0909341SAndroid Build Coastguard Worker        uxtl_b          v5,  v6,  v7,  v16
1790*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_0 v30, v1, v2, v3, v4, v5, v6, v7, v16
1791*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,   v30
1792*c0909341SAndroid Build Coastguard Worker        st_h            \d_strd, v30, 4
1793*c0909341SAndroid Build Coastguard Worker        b.le            0f
1794*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1795*c0909341SAndroid Build Coastguard Worker        mov             v1.16b,  v5.16b
1796*c0909341SAndroid Build Coastguard Worker        mov             v2.16b,  v6.16b
1797*c0909341SAndroid Build Coastguard Worker        mov             v3.16b,  v7.16b
1798*c0909341SAndroid Build Coastguard Worker        mov             v4.16b,  v16.16b
1799*c0909341SAndroid Build Coastguard Worker        mov             v5.16b,  v17.16b
1800*c0909341SAndroid Build Coastguard Worker        mov             v6.16b,  v18.16b
1801*c0909341SAndroid Build Coastguard Worker        mov             v7.16b,  v19.16b
1802*c0909341SAndroid Build Coastguard Worker        b.eq            26f
1803*c0909341SAndroid Build Coastguard Worker        b               216b
1804*c0909341SAndroid Build Coastguard Worker26:
1805*c0909341SAndroid Build Coastguard Worker        load_h          \sr2, \src, \s_strd, v16, v17
1806*c0909341SAndroid Build Coastguard Worker        interleave_1_h  v7,  v16, v17
1807*c0909341SAndroid Build Coastguard Worker        uxtl_b          v5,  v6,  v7,  v16
1808*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_0_4h v30, v1, v2, v3, v4, v5, v6, v7, v16
1809*c0909341SAndroid Build Coastguard Worker        sqrshrun_b      6,   v30
1810*c0909341SAndroid Build Coastguard Worker        st_h            \d_strd, v30, 2
1811*c0909341SAndroid Build Coastguard Worker0:
1812*c0909341SAndroid Build Coastguard Worker        ret
1813*c0909341SAndroid Build Coastguard Worker.endif
1814*c0909341SAndroid Build Coastguard Worker
1815*c0909341SAndroid Build Coastguard Worker40:
1816*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1817*c0909341SAndroid Build Coastguard Worker        b.gt            480f
1818*c0909341SAndroid Build Coastguard Worker
1819*c0909341SAndroid Build Coastguard Worker        // 4x2, 4x4 v
1820*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1821*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmy, #2]
1822*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1823*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1824*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1825*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1826*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1827*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1828*c0909341SAndroid Build Coastguard Worker
1829*c0909341SAndroid Build Coastguard Worker        load_s          \src, \sr2, \s_strd, v1, v2, v3, v4, v5
1830*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v1, v2, v3, v4, v5
1831*c0909341SAndroid Build Coastguard Worker        uxtl_b          v1, v2, v3, v4
1832*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v6, v1, v2, v3, v4, .8h
1833*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v6
1834*c0909341SAndroid Build Coastguard Worker        b.le            0f
1835*c0909341SAndroid Build Coastguard Worker        load_s          \sr2, \src, \s_strd, v6, v7
1836*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v5, v6, v7
1837*c0909341SAndroid Build Coastguard Worker        uxtl_b          v5, v6
1838*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v7, v3, v4, v5, v6, .8h
1839*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v7
1840*c0909341SAndroid Build Coastguard Worker0:
1841*c0909341SAndroid Build Coastguard Worker        ret
1842*c0909341SAndroid Build Coastguard Worker
1843*c0909341SAndroid Build Coastguard Worker480:    // 4x6, 4x8, 4x12, 4x16 v
1844*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [\xmy]
1845*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
1846*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1847*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
1848*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1849*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1850*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1851*c0909341SAndroid Build Coastguard Worker
1852*c0909341SAndroid Build Coastguard Worker        load_s          \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22
1853*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v16, v17, v18
1854*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v18, v19, v20, v21, v22
1855*c0909341SAndroid Build Coastguard Worker        uxtl_b          v16, v17
1856*c0909341SAndroid Build Coastguard Worker        uxtl_b          v18, v19, v20, v21
1857*c0909341SAndroid Build Coastguard Worker
1858*c0909341SAndroid Build Coastguard Worker48:
1859*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1860*c0909341SAndroid Build Coastguard Worker        load_s          \sr2, \src, \s_strd, v23, v24, v25, v26
1861*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v22, v23, v24, v25, v26
1862*c0909341SAndroid Build Coastguard Worker        uxtl_b          v22, v23, v24, v25
1863*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_2 v1, v2, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
1864*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v1, v2
1865*c0909341SAndroid Build Coastguard Worker        b.le            0f
1866*c0909341SAndroid Build Coastguard Worker        load_s          \sr2,  \src, \s_strd, v27, v16
1867*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1868*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v26, v27, v16
1869*c0909341SAndroid Build Coastguard Worker        uxtl_b          v26, v27
1870*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_0 v1, v20, v21, v22, v23, v24, v25, v26, v27
1871*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v1
1872*c0909341SAndroid Build Coastguard Worker        b.le            0f
1873*c0909341SAndroid Build Coastguard Worker        load_s          \sr2,  \src, \s_strd, v17, v18
1874*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1875*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v16, v17, v18
1876*c0909341SAndroid Build Coastguard Worker        uxtl_b          v16, v17
1877*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_0 v2, v22, v23, v24, v25, v26, v27, v16, v17
1878*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v2
1879*c0909341SAndroid Build Coastguard Worker        b.le            0f
1880*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1881*c0909341SAndroid Build Coastguard Worker        load_s          \sr2, \src, \s_strd, v19, v20, v21, v22
1882*c0909341SAndroid Build Coastguard Worker        interleave_1_s  v18, v19, v20, v21, v22
1883*c0909341SAndroid Build Coastguard Worker        uxtl_b          v18, v19, v20, v21
1884*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_2 v1, v2, v24, v25, v26, v27, v16, v17, v18, v19, v20, v21
1885*c0909341SAndroid Build Coastguard Worker        shift_store_4   \type, \d_strd, v1, v2
1886*c0909341SAndroid Build Coastguard Worker        b.gt            48b
1887*c0909341SAndroid Build Coastguard Worker0:
1888*c0909341SAndroid Build Coastguard Worker        ret
1889*c0909341SAndroid Build Coastguard Worker
1890*c0909341SAndroid Build Coastguard Worker80:
1891*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1892*c0909341SAndroid Build Coastguard Worker        b.gt            880f
1893*c0909341SAndroid Build Coastguard Worker
1894*c0909341SAndroid Build Coastguard Worker        // 8x2, 8x4 v
1895*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
1896*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmy, #2]
1897*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1898*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1899*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1900*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1901*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1902*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1903*c0909341SAndroid Build Coastguard Worker
1904*c0909341SAndroid Build Coastguard Worker        load_8b         \src, \sr2, \s_strd, v1, v2, v3, v4, v5
1905*c0909341SAndroid Build Coastguard Worker        uxtl_b          v1, v2, v3, v4, v5
1906*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v6, v1, v2, v3, v4, .8h
1907*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v7, v2, v3, v4, v5, .8h
1908*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v6, v7
1909*c0909341SAndroid Build Coastguard Worker        b.le            0f
1910*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v6, v7
1911*c0909341SAndroid Build Coastguard Worker        uxtl_b          v6, v7
1912*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v1, v3, v4, v5, v6, .8h
1913*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v2, v4, v5, v6, v7, .8h
1914*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v1, v2
1915*c0909341SAndroid Build Coastguard Worker0:
1916*c0909341SAndroid Build Coastguard Worker        ret
1917*c0909341SAndroid Build Coastguard Worker
1918*c0909341SAndroid Build Coastguard Worker880:    // 8x6, 8x8, 8x16, 8x32 v
1919*c0909341SAndroid Build Coastguard Worker1680:   // 16x8, 16x16, ...
1920*c0909341SAndroid Build Coastguard Worker320:    // 32x8, 32x16, ...
1921*c0909341SAndroid Build Coastguard Worker640:
1922*c0909341SAndroid Build Coastguard Worker1280:
1923*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1924*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [\xmy]
1925*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1926*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #1
1927*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
1928*c0909341SAndroid Build Coastguard Worker        mov             \my,  \h
1929*c0909341SAndroid Build Coastguard Worker168:
1930*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1931*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1932*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1933*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
1934*c0909341SAndroid Build Coastguard Worker
1935*c0909341SAndroid Build Coastguard Worker        load_8b         \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22
1936*c0909341SAndroid Build Coastguard Worker        uxtl_b          v16, v17, v18, v19, v20, v21, v22
1937*c0909341SAndroid Build Coastguard Worker
1938*c0909341SAndroid Build Coastguard Worker88:
1939*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1940*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v23, v24
1941*c0909341SAndroid Build Coastguard Worker        uxtl_b          v23, v24
1942*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v1, v2, v16, v17, v18, v19, v20, v21, v22, v23, v24
1943*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v1, v2
1944*c0909341SAndroid Build Coastguard Worker        b.le            9f
1945*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1946*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v25, v26
1947*c0909341SAndroid Build Coastguard Worker        uxtl_b          v25, v26
1948*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v3, v4, v18, v19, v20, v21, v22, v23, v24, v25, v26
1949*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v3, v4
1950*c0909341SAndroid Build Coastguard Worker        b.le            9f
1951*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1952*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v27, v16
1953*c0909341SAndroid Build Coastguard Worker        uxtl_b          v27, v16
1954*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v1, v2, v20, v21, v22, v23, v24, v25, v26, v27, v16
1955*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v1, v2
1956*c0909341SAndroid Build Coastguard Worker        b.le            9f
1957*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
1958*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v17, v18
1959*c0909341SAndroid Build Coastguard Worker        uxtl_b          v17, v18
1960*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v3, v4, v22, v23, v24, v25, v26, v27, v16, v17, v18
1961*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v3, v4
1962*c0909341SAndroid Build Coastguard Worker        b.le            9f
1963*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #4
1964*c0909341SAndroid Build Coastguard Worker        load_8b         \sr2, \src, \s_strd, v19, v20, v21, v22
1965*c0909341SAndroid Build Coastguard Worker        uxtl_b          v19, v20, v21, v22
1966*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v1, v2, v24, v25, v26, v27, v16, v17, v18, v19, v20
1967*c0909341SAndroid Build Coastguard Worker        mul_mla_\taps\()_1 v3, v4, v26, v27, v16, v17, v18, v19, v20, v21, v22
1968*c0909341SAndroid Build Coastguard Worker        shift_store_8   \type, \d_strd, v1, v2, v3, v4
1969*c0909341SAndroid Build Coastguard Worker        b.gt            88b
1970*c0909341SAndroid Build Coastguard Worker9:
1971*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
1972*c0909341SAndroid Build Coastguard Worker        b.le            0f
1973*c0909341SAndroid Build Coastguard Worker        asr             \s_strd, \s_strd, #1
1974*c0909341SAndroid Build Coastguard Worker        asr             \d_strd, \d_strd, #1
1975*c0909341SAndroid Build Coastguard Worker        msub            \src, \s_strd, \xmy, \src
1976*c0909341SAndroid Build Coastguard Worker        msub            \dst, \d_strd, \xmy, \dst
1977*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #3
1978*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
1979*c0909341SAndroid Build Coastguard Worker        add             \src, \src, #8
1980*c0909341SAndroid Build Coastguard Worker.ifc \type, put
1981*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #8
1982*c0909341SAndroid Build Coastguard Worker.else
1983*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #16
1984*c0909341SAndroid Build Coastguard Worker.endif
1985*c0909341SAndroid Build Coastguard Worker        b               168b
1986*c0909341SAndroid Build Coastguard Worker0:
1987*c0909341SAndroid Build Coastguard Worker        ret
1988*c0909341SAndroid Build Coastguard Worker
1989*c0909341SAndroid Build Coastguard Worker160:
1990*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
1991*c0909341SAndroid Build Coastguard Worker        b.gt            1680b
1992*c0909341SAndroid Build Coastguard Worker
1993*c0909341SAndroid Build Coastguard Worker        // 16x2, 16x4 v
1994*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmy, #2]
1995*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd
1996*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
1997*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
1998*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
1999*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2000*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h, v0.8b
2001*c0909341SAndroid Build Coastguard Worker
2002*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
2003*c0909341SAndroid Build Coastguard Worker        load_16b        \src, \sr2, \s_strd, v1,  v2,  v3,  v4,  v5
2004*c0909341SAndroid Build Coastguard Worker        uxtl            v16.8h, v1.8b
2005*c0909341SAndroid Build Coastguard Worker        uxtl            v17.8h, v2.8b
2006*c0909341SAndroid Build Coastguard Worker        uxtl            v18.8h, v3.8b
2007*c0909341SAndroid Build Coastguard Worker        uxtl            v19.8h, v4.8b
2008*c0909341SAndroid Build Coastguard Worker        uxtl            v20.8h, v5.8b
2009*c0909341SAndroid Build Coastguard Worker        uxtl2           v23.8h, v1.16b
2010*c0909341SAndroid Build Coastguard Worker        uxtl2           v24.8h, v2.16b
2011*c0909341SAndroid Build Coastguard Worker        uxtl2           v25.8h, v3.16b
2012*c0909341SAndroid Build Coastguard Worker        uxtl2           v26.8h, v4.16b
2013*c0909341SAndroid Build Coastguard Worker        uxtl2           v27.8h, v5.16b
2014*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v1,  v16, v17, v18, v19, .8h
2015*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v16, v17, v18, v19, v20, .8h
2016*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v2,  v23, v24, v25, v26, .8h
2017*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v17, v24, v25, v26, v27, .8h
2018*c0909341SAndroid Build Coastguard Worker        shift_store_16  \type, \d_strd, v1, v2, v16, v17
2019*c0909341SAndroid Build Coastguard Worker        b.le            0f
2020*c0909341SAndroid Build Coastguard Worker        load_16b        \sr2, \src, \s_strd, v6,  v7
2021*c0909341SAndroid Build Coastguard Worker        uxtl            v21.8h, v6.8b
2022*c0909341SAndroid Build Coastguard Worker        uxtl            v22.8h, v7.8b
2023*c0909341SAndroid Build Coastguard Worker        uxtl2           v28.8h, v6.16b
2024*c0909341SAndroid Build Coastguard Worker        uxtl2           v29.8h, v7.16b
2025*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v1,  v18, v19, v20, v21, .8h
2026*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v3,  v19, v20, v21, v22, .8h
2027*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v2,  v25, v26, v27, v28, .8h
2028*c0909341SAndroid Build Coastguard Worker        mul_mla_4tap    v4,  v26, v27, v28, v29, .8h
2029*c0909341SAndroid Build Coastguard Worker        shift_store_16  \type, \d_strd, v1, v2, v3, v4
2030*c0909341SAndroid Build Coastguard Worker0:
2031*c0909341SAndroid Build Coastguard Worker        ret
2032*c0909341SAndroid Build Coastguard Workerendfunc
2033*c0909341SAndroid Build Coastguard Worker
2034*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_v_tbl
2035*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_\taps\()_v_tbl
2036*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_\taps\()_v_tbl
2037*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_\taps\()_v_tbl
2038*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_\taps\()_v_tbl
2039*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_\taps\()_v_tbl
2040*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_\taps\()_v_tbl
2041*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_\taps\()_v_tbl
2042*c0909341SAndroid Build Coastguard Workerendjumptable
2043*c0909341SAndroid Build Coastguard Worker
2044*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_hv)
2045*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
2046*c0909341SAndroid Build Coastguard Worker        ubfx            w9,  \my, #7, #7
2047*c0909341SAndroid Build Coastguard Worker        and             \my, \my, #0x7f
2048*c0909341SAndroid Build Coastguard Worker        b.le            4f
2049*c0909341SAndroid Build Coastguard Worker        mov             \my,  w9
2050*c0909341SAndroid Build Coastguard Worker4:
2051*c0909341SAndroid Build Coastguard Worker        add             \xmy,  x10, \my, uxtw #3
2052*c0909341SAndroid Build Coastguard Worker
2053*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_\taps\()_hv_tbl
2054*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
2055*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
2056*c0909341SAndroid Build Coastguard Worker        br              x9
2057*c0909341SAndroid Build Coastguard Worker
2058*c0909341SAndroid Build Coastguard Worker20:
2059*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2060*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2061*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmx, #2]
2062*c0909341SAndroid Build Coastguard Worker        b.gt            280f
2063*c0909341SAndroid Build Coastguard Worker        ldur            s1,  [\xmy, #2]
2064*c0909341SAndroid Build Coastguard Worker
2065*c0909341SAndroid Build Coastguard Worker        // 2x2, 2x4 hv
2066*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, #1
2067*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2068*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2069*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2070*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2071*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2072*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2073*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2074*c0909341SAndroid Build Coastguard Worker
2075*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b}, [\src], \s_strd
2076*c0909341SAndroid Build Coastguard Worker        uxtl            v28.8h,  v28.8b
2077*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v28.16b, v28.16b, #2
2078*c0909341SAndroid Build Coastguard Worker        mul             v28.4h,  v28.4h,  v0.4h
2079*c0909341SAndroid Build Coastguard Worker        mul             v29.4h,  v29.4h,  v0.4h
2080*c0909341SAndroid Build Coastguard Worker        addp            v28.4h,  v28.4h,  v29.4h
2081*c0909341SAndroid Build Coastguard Worker        addp            v16.4h,  v28.4h,  v28.4h
2082*c0909341SAndroid Build Coastguard Worker        srshr           v16.4h,  v16.4h,  #2
2083*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2084*c0909341SAndroid Build Coastguard Worker
2085*c0909341SAndroid Build Coastguard Worker        trn1            v16.2s, v16.2s, v28.2s
2086*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v28.8b
2087*c0909341SAndroid Build Coastguard Worker
2088*c0909341SAndroid Build Coastguard Worker2:
2089*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2090*c0909341SAndroid Build Coastguard Worker
2091*c0909341SAndroid Build Coastguard Worker        ext             v18.8b, v17.8b, v28.8b, #4
2092*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2093*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2094*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2095*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v28.4h, v1.h[3]
2096*c0909341SAndroid Build Coastguard Worker
2097*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2098*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2099*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2100*c0909341SAndroid Build Coastguard Worker        st1             {v2.h}[0], [\dst], \d_strd
2101*c0909341SAndroid Build Coastguard Worker        st1             {v2.h}[1], [\ds2], \d_strd
2102*c0909341SAndroid Build Coastguard Worker        b.le            0f
2103*c0909341SAndroid Build Coastguard Worker        mov             v16.8b, v18.8b
2104*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v28.8b
2105*c0909341SAndroid Build Coastguard Worker        b               2b
2106*c0909341SAndroid Build Coastguard Worker
2107*c0909341SAndroid Build Coastguard Worker280:    // 2x8, 2x16, 2x32 hv
2108*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8b},  [\xmy]
2109*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, #1
2110*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
2111*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2112*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2113*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2114*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2115*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2116*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2117*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2118*c0909341SAndroid Build Coastguard Worker
2119*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b}, [\src], \s_strd
2120*c0909341SAndroid Build Coastguard Worker        uxtl            v28.8h,  v28.8b
2121*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v28.16b, v28.16b, #2
2122*c0909341SAndroid Build Coastguard Worker        mul             v28.4h,  v28.4h,  v0.4h
2123*c0909341SAndroid Build Coastguard Worker        mul             v29.4h,  v29.4h,  v0.4h
2124*c0909341SAndroid Build Coastguard Worker        addp            v28.4h,  v28.4h,  v29.4h
2125*c0909341SAndroid Build Coastguard Worker        addp            v16.4h,  v28.4h,  v28.4h
2126*c0909341SAndroid Build Coastguard Worker        srshr           v16.4h,  v16.4h,  #2
2127*c0909341SAndroid Build Coastguard Worker
2128*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2129*c0909341SAndroid Build Coastguard Worker        trn1            v16.2s, v16.2s, v28.2s
2130*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v28.8b
2131*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2132*c0909341SAndroid Build Coastguard Worker        ext             v18.8b, v17.8b, v28.8b, #4
2133*c0909341SAndroid Build Coastguard Worker        mov             v19.8b, v28.8b
2134*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2135*c0909341SAndroid Build Coastguard Worker        ext             v20.8b, v19.8b, v28.8b, #4
2136*c0909341SAndroid Build Coastguard Worker        mov             v21.8b, v28.8b
2137*c0909341SAndroid Build Coastguard Worker
2138*c0909341SAndroid Build Coastguard Worker28:
2139*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_2)
2140*c0909341SAndroid Build Coastguard Worker        ext             v22.8b, v21.8b, v28.8b, #4
2141*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2142*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v17.4h, v1.h[1]
2143*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2144*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[3]
2145*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[4]
2146*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[5]
2147*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[6]
2148*c0909341SAndroid Build Coastguard Worker.else   // 8tap
2149*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2150*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2151*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2152*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[3]
2153*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[4]
2154*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[5]
2155*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[6]
2156*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v28.4h, v1.h[7]
2157*c0909341SAndroid Build Coastguard Worker.endif
2158*c0909341SAndroid Build Coastguard Worker
2159*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2160*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2161*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2162*c0909341SAndroid Build Coastguard Worker        st1             {v2.h}[0], [\dst], \d_strd
2163*c0909341SAndroid Build Coastguard Worker        st1             {v2.h}[1], [\ds2], \d_strd
2164*c0909341SAndroid Build Coastguard Worker        b.le            0f
2165*c0909341SAndroid Build Coastguard Worker        mov             v16.8b, v18.8b
2166*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v19.8b
2167*c0909341SAndroid Build Coastguard Worker        mov             v18.8b, v20.8b
2168*c0909341SAndroid Build Coastguard Worker        mov             v19.8b, v21.8b
2169*c0909341SAndroid Build Coastguard Worker        mov             v20.8b, v22.8b
2170*c0909341SAndroid Build Coastguard Worker        mov             v21.8b, v28.8b
2171*c0909341SAndroid Build Coastguard Worker        b               28b
2172*c0909341SAndroid Build Coastguard Worker
2173*c0909341SAndroid Build Coastguard Worker0:
2174*c0909341SAndroid Build Coastguard Worker        ret             x15
2175*c0909341SAndroid Build Coastguard Worker
2176*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_2):
2177*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b},  [\sr2], \s_strd
2178*c0909341SAndroid Build Coastguard Worker        ld1             {v30.8b},  [\src], \s_strd
2179*c0909341SAndroid Build Coastguard Worker        uxtl            v28.8h,  v28.8b
2180*c0909341SAndroid Build Coastguard Worker        uxtl            v30.8h,  v30.8b
2181*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v28.16b, v28.16b, #2
2182*c0909341SAndroid Build Coastguard Worker        ext             v31.16b, v30.16b, v30.16b, #2
2183*c0909341SAndroid Build Coastguard Worker        trn1            v27.2s,  v28.2s,  v30.2s
2184*c0909341SAndroid Build Coastguard Worker        trn2            v30.2s,  v28.2s,  v30.2s
2185*c0909341SAndroid Build Coastguard Worker        trn1            v28.2s,  v29.2s,  v31.2s
2186*c0909341SAndroid Build Coastguard Worker        trn2            v31.2s,  v29.2s,  v31.2s
2187*c0909341SAndroid Build Coastguard Worker        mul             v27.4h,  v27.4h,  v0.h[0]
2188*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v28.4h,  v0.h[1]
2189*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v30.4h,  v0.h[2]
2190*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v31.4h,  v0.h[3]
2191*c0909341SAndroid Build Coastguard Worker        srshr           v28.4h,  v27.4h,  #2
2192*c0909341SAndroid Build Coastguard Worker        ret
2193*c0909341SAndroid Build Coastguard Worker.endif
2194*c0909341SAndroid Build Coastguard Worker
2195*c0909341SAndroid Build Coastguard Worker40:
2196*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2197*c0909341SAndroid Build Coastguard Worker        ldur            s0,  [\xmx, #2]
2198*c0909341SAndroid Build Coastguard Worker        b.gt            480f
2199*c0909341SAndroid Build Coastguard Worker        ldur            s1,  [\xmy, #2]
2200*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, #1
2201*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2202*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2203*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2204*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2205*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2206*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2207*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2208*c0909341SAndroid Build Coastguard Worker
2209*c0909341SAndroid Build Coastguard Worker        // 4x2, 4x4 hv
2210*c0909341SAndroid Build Coastguard Worker        ld1             {v26.8b}, [\src], \s_strd
2211*c0909341SAndroid Build Coastguard Worker        uxtl            v26.8h,  v26.8b
2212*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v26.16b, v26.16b, #2
2213*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v26.16b, v26.16b, #4
2214*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v26.16b, v26.16b, #6
2215*c0909341SAndroid Build Coastguard Worker        mul             v31.4h,  v26.4h,  v0.h[0]
2216*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v28.4h,  v0.h[1]
2217*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v29.4h,  v0.h[2]
2218*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v30.4h,  v0.h[3]
2219*c0909341SAndroid Build Coastguard Worker        srshr           v16.4h,  v31.4h,  #2
2220*c0909341SAndroid Build Coastguard Worker
2221*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2222*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v28.8b
2223*c0909341SAndroid Build Coastguard Worker        mov             v18.8b, v29.8b
2224*c0909341SAndroid Build Coastguard Worker
2225*c0909341SAndroid Build Coastguard Worker4:
2226*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2227*c0909341SAndroid Build Coastguard Worker        // Interleaving the mul/mla chains actually hurts performance
2228*c0909341SAndroid Build Coastguard Worker        // significantly on Cortex A53, thus keeping mul/mla tightly
2229*c0909341SAndroid Build Coastguard Worker        // chained like this.
2230*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2231*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2232*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2233*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v28.4h, v1.h[3]
2234*c0909341SAndroid Build Coastguard Worker        smull           v3.4s,  v17.4h, v1.h[0]
2235*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v18.4h, v1.h[1]
2236*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v28.4h, v1.h[2]
2237*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v29.4h, v1.h[3]
2238*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2239*c0909341SAndroid Build Coastguard Worker        sqrshrn         v3.4h,  v3.4s,  #\shift_hv
2240*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2241*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2242*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2243*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,  v3.8h
2244*c0909341SAndroid Build Coastguard Worker        str             s2,  [\dst]
2245*c0909341SAndroid Build Coastguard Worker        str             s3,  [\ds2]
2246*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, \d_strd
2247*c0909341SAndroid Build Coastguard Worker        add             \ds2, \ds2, \d_strd
2248*c0909341SAndroid Build Coastguard Worker.else
2249*c0909341SAndroid Build Coastguard Worker        st1             {v2.4h}, [\dst], \d_strd
2250*c0909341SAndroid Build Coastguard Worker        st1             {v3.4h}, [\ds2], \d_strd
2251*c0909341SAndroid Build Coastguard Worker.endif
2252*c0909341SAndroid Build Coastguard Worker        b.le            0f
2253*c0909341SAndroid Build Coastguard Worker        mov             v16.8b,  v18.8b
2254*c0909341SAndroid Build Coastguard Worker        mov             v17.8b,  v28.8b
2255*c0909341SAndroid Build Coastguard Worker        mov             v18.8b,  v29.8b
2256*c0909341SAndroid Build Coastguard Worker        b               4b
2257*c0909341SAndroid Build Coastguard Worker
2258*c0909341SAndroid Build Coastguard Worker480:    // 4x8, 4x16, 4x32 hv
2259*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8b},  [\xmy]
2260*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, #1
2261*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2262*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd
2263*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #1
2264*c0909341SAndroid Build Coastguard Worker.else
2265*c0909341SAndroid Build Coastguard Worker        sub             \sr2, \src, \s_strd, lsl #1
2266*c0909341SAndroid Build Coastguard Worker        sub             \src, \sr2, \s_strd
2267*c0909341SAndroid Build Coastguard Worker.endif
2268*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
2269*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2270*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2271*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2272*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2273*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2274*c0909341SAndroid Build Coastguard Worker
2275*c0909341SAndroid Build Coastguard Worker        ld1             {v26.8b}, [\src], \s_strd
2276*c0909341SAndroid Build Coastguard Worker        uxtl            v26.8h,  v26.8b
2277*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v26.16b, v26.16b, #2
2278*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v26.16b, v26.16b, #4
2279*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v26.16b, v26.16b, #6
2280*c0909341SAndroid Build Coastguard Worker        mul             v31.4h,  v26.4h,  v0.h[0]
2281*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v28.4h,  v0.h[1]
2282*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v29.4h,  v0.h[2]
2283*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v30.4h,  v0.h[3]
2284*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2285*c0909341SAndroid Build Coastguard Worker        srshr           v18.4h,  v31.4h,  #2
2286*c0909341SAndroid Build Coastguard Worker.else
2287*c0909341SAndroid Build Coastguard Worker        srshr           v16.4h,  v31.4h,  #2
2288*c0909341SAndroid Build Coastguard Worker
2289*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2290*c0909341SAndroid Build Coastguard Worker        mov             v17.8b, v28.8b
2291*c0909341SAndroid Build Coastguard Worker        mov             v18.8b, v29.8b
2292*c0909341SAndroid Build Coastguard Worker.endif
2293*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2294*c0909341SAndroid Build Coastguard Worker        mov             v19.8b, v28.8b
2295*c0909341SAndroid Build Coastguard Worker        mov             v20.8b, v29.8b
2296*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2297*c0909341SAndroid Build Coastguard Worker        mov             v21.8b, v28.8b
2298*c0909341SAndroid Build Coastguard Worker        mov             v22.8b, v29.8b
2299*c0909341SAndroid Build Coastguard Worker
2300*c0909341SAndroid Build Coastguard Worker48:
2301*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_4)
2302*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2303*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v18.4h, v1.h[1]
2304*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[2]
2305*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[3]
2306*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[4]
2307*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[5]
2308*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v28.4h, v1.h[6]
2309*c0909341SAndroid Build Coastguard Worker        smull           v3.4s,  v19.4h, v1.h[1]
2310*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v20.4h, v1.h[2]
2311*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v21.4h, v1.h[3]
2312*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v22.4h, v1.h[4]
2313*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v28.4h, v1.h[5]
2314*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v29.4h, v1.h[6]
2315*c0909341SAndroid Build Coastguard Worker.else   // 8tap
2316*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2317*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2318*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2319*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[3]
2320*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[4]
2321*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[5]
2322*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[6]
2323*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v28.4h, v1.h[7]
2324*c0909341SAndroid Build Coastguard Worker        smull           v3.4s,  v17.4h, v1.h[0]
2325*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v18.4h, v1.h[1]
2326*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v19.4h, v1.h[2]
2327*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v20.4h, v1.h[3]
2328*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v21.4h, v1.h[4]
2329*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v22.4h, v1.h[5]
2330*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v28.4h, v1.h[6]
2331*c0909341SAndroid Build Coastguard Worker        smlal           v3.4s,  v29.4h, v1.h[7]
2332*c0909341SAndroid Build Coastguard Worker.endif
2333*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2334*c0909341SAndroid Build Coastguard Worker        sqrshrn         v3.4h,  v3.4s,  #\shift_hv
2335*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2336*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2337*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2338*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,  v3.8h
2339*c0909341SAndroid Build Coastguard Worker        str             s2,  [\dst]
2340*c0909341SAndroid Build Coastguard Worker        str             s3,  [\ds2]
2341*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, \d_strd
2342*c0909341SAndroid Build Coastguard Worker        add             \ds2, \ds2, \d_strd
2343*c0909341SAndroid Build Coastguard Worker.else
2344*c0909341SAndroid Build Coastguard Worker        st1             {v2.4h}, [\dst], \d_strd
2345*c0909341SAndroid Build Coastguard Worker        st1             {v3.4h}, [\ds2], \d_strd
2346*c0909341SAndroid Build Coastguard Worker.endif
2347*c0909341SAndroid Build Coastguard Worker        b.le            0f
2348*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap
2349*c0909341SAndroid Build Coastguard Worker        mov             v16.8b,  v18.8b
2350*c0909341SAndroid Build Coastguard Worker        mov             v17.8b,  v19.8b
2351*c0909341SAndroid Build Coastguard Worker.endif
2352*c0909341SAndroid Build Coastguard Worker        mov             v18.8b,  v20.8b
2353*c0909341SAndroid Build Coastguard Worker        mov             v19.8b,  v21.8b
2354*c0909341SAndroid Build Coastguard Worker        mov             v20.8b,  v22.8b
2355*c0909341SAndroid Build Coastguard Worker        mov             v21.8b,  v28.8b
2356*c0909341SAndroid Build Coastguard Worker        mov             v22.8b,  v29.8b
2357*c0909341SAndroid Build Coastguard Worker        b               48b
2358*c0909341SAndroid Build Coastguard Worker0:
2359*c0909341SAndroid Build Coastguard Worker        ret             x15
2360*c0909341SAndroid Build Coastguard Worker
2361*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_4):
2362*c0909341SAndroid Build Coastguard Worker        ld1             {v26.8b}, [\sr2], \s_strd
2363*c0909341SAndroid Build Coastguard Worker        ld1             {v27.8b}, [\src], \s_strd
2364*c0909341SAndroid Build Coastguard Worker        uxtl            v26.8h,  v26.8b
2365*c0909341SAndroid Build Coastguard Worker        uxtl            v27.8h,  v27.8b
2366*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v26.16b, v26.16b, #2
2367*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v26.16b, v26.16b, #4
2368*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v26.16b, v26.16b, #6
2369*c0909341SAndroid Build Coastguard Worker        mul             v31.4h,  v26.4h,  v0.h[0]
2370*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v28.4h,  v0.h[1]
2371*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v29.4h,  v0.h[2]
2372*c0909341SAndroid Build Coastguard Worker        mla             v31.4h,  v30.4h,  v0.h[3]
2373*c0909341SAndroid Build Coastguard Worker        ext             v28.16b, v27.16b, v27.16b, #2
2374*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v27.16b, v27.16b, #4
2375*c0909341SAndroid Build Coastguard Worker        ext             v30.16b, v27.16b, v27.16b, #6
2376*c0909341SAndroid Build Coastguard Worker        mul             v27.4h,  v27.4h,  v0.h[0]
2377*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v28.4h,  v0.h[1]
2378*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v29.4h,  v0.h[2]
2379*c0909341SAndroid Build Coastguard Worker        mla             v27.4h,  v30.4h,  v0.h[3]
2380*c0909341SAndroid Build Coastguard Worker        srshr           v28.4h,  v31.4h,  #2
2381*c0909341SAndroid Build Coastguard Worker        srshr           v29.4h,  v27.4h,  #2
2382*c0909341SAndroid Build Coastguard Worker        ret
2383*c0909341SAndroid Build Coastguard Worker
2384*c0909341SAndroid Build Coastguard Worker80:
2385*c0909341SAndroid Build Coastguard Worker160:
2386*c0909341SAndroid Build Coastguard Worker320:
2387*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2388*c0909341SAndroid Build Coastguard Worker        b.gt            880f
2389*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b},  [\xmx]
2390*c0909341SAndroid Build Coastguard Worker        ldur            s1,  [\xmy, #2]
2391*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2392*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #2
2393*c0909341SAndroid Build Coastguard Worker.else
2394*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
2395*c0909341SAndroid Build Coastguard Worker.endif
2396*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
2397*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2398*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2399*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2400*c0909341SAndroid Build Coastguard Worker        mov             \my,  \h
2401*c0909341SAndroid Build Coastguard Worker
2402*c0909341SAndroid Build Coastguard Worker164:    // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv
2403*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2404*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2405*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2406*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2407*c0909341SAndroid Build Coastguard Worker
2408*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8_first)
2409*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2410*c0909341SAndroid Build Coastguard Worker        mov             v17.16b, v24.16b
2411*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v25.16b
2412*c0909341SAndroid Build Coastguard Worker
2413*c0909341SAndroid Build Coastguard Worker8:
2414*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2415*c0909341SAndroid Build Coastguard Worker        smull2          v3.4s,  v16.8h, v1.h[0]
2416*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2417*c0909341SAndroid Build Coastguard Worker        smull           v4.4s,  v17.4h, v1.h[0]
2418*c0909341SAndroid Build Coastguard Worker        smull2          v5.4s,  v17.8h, v1.h[0]
2419*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2420*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v17.8h, v1.h[1]
2421*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v18.4h, v1.h[1]
2422*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v18.8h, v1.h[1]
2423*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2424*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v18.8h, v1.h[2]
2425*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v24.4h, v1.h[2]
2426*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v24.8h, v1.h[2]
2427*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v24.4h, v1.h[3]
2428*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v24.8h, v1.h[3]
2429*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v25.4h, v1.h[3]
2430*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v25.8h, v1.h[3]
2431*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2432*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v2.8h,  v3.4s,  #\shift_hv
2433*c0909341SAndroid Build Coastguard Worker        sqrshrn         v4.4h,  v4.4s,  #\shift_hv
2434*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v4.8h,  v5.4s,  #\shift_hv
2435*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2436*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2437*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2438*c0909341SAndroid Build Coastguard Worker        sqxtun          v4.8b,  v4.8h
2439*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [\dst], \d_strd
2440*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [\ds2], \d_strd
2441*c0909341SAndroid Build Coastguard Worker.else
2442*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [\dst], \d_strd
2443*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [\ds2], \d_strd
2444*c0909341SAndroid Build Coastguard Worker.endif
2445*c0909341SAndroid Build Coastguard Worker        b.le            9f
2446*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
2447*c0909341SAndroid Build Coastguard Worker        mov             v17.16b, v24.16b
2448*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v25.16b
2449*c0909341SAndroid Build Coastguard Worker        b               8b
2450*c0909341SAndroid Build Coastguard Worker9:
2451*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
2452*c0909341SAndroid Build Coastguard Worker        b.le            0f
2453*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
2454*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
2455*c0909341SAndroid Build Coastguard Worker        msub            \src,  \s_strd,  \xmy,  \src
2456*c0909341SAndroid Build Coastguard Worker        msub            \dst,  \d_strd,  \xmy,  \dst
2457*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #2
2458*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2459*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
2460*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2461*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
2462*c0909341SAndroid Build Coastguard Worker.else
2463*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
2464*c0909341SAndroid Build Coastguard Worker.endif
2465*c0909341SAndroid Build Coastguard Worker        b               164b
2466*c0909341SAndroid Build Coastguard Worker
2467*c0909341SAndroid Build Coastguard Worker880:    // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv
2468*c0909341SAndroid Build Coastguard Worker640:
2469*c0909341SAndroid Build Coastguard Worker1280:
2470*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2471*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b},  [\xmx]
2472*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8b},  [\xmy]
2473*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2474*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #2
2475*c0909341SAndroid Build Coastguard Worker.else
2476*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  #3
2477*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd
2478*c0909341SAndroid Build Coastguard Worker.endif
2479*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd, lsl #1
2480*c0909341SAndroid Build Coastguard Worker        sxtl            v0.8h,  v0.8b
2481*c0909341SAndroid Build Coastguard Worker        sxtl            v1.8h,  v1.8b
2482*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2483*c0909341SAndroid Build Coastguard Worker        mov             \my,  \h
2484*c0909341SAndroid Build Coastguard Worker
2485*c0909341SAndroid Build Coastguard Worker168:
2486*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2487*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2488*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2489*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
2490*c0909341SAndroid Build Coastguard Worker
2491*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8_first)
2492*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2493*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v16.16b
2494*c0909341SAndroid Build Coastguard Worker.else
2495*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2496*c0909341SAndroid Build Coastguard Worker        mov             v17.16b, v24.16b
2497*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v25.16b
2498*c0909341SAndroid Build Coastguard Worker.endif
2499*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2500*c0909341SAndroid Build Coastguard Worker        mov             v19.16b, v24.16b
2501*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v25.16b
2502*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2503*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v24.16b
2504*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v25.16b
2505*c0909341SAndroid Build Coastguard Worker
2506*c0909341SAndroid Build Coastguard Worker88:
2507*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2508*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v18.4h, v1.h[1]
2509*c0909341SAndroid Build Coastguard Worker        smull2          v3.4s,  v18.8h, v1.h[1]
2510*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2511*c0909341SAndroid Build Coastguard Worker        smull           v4.4s,  v19.4h, v1.h[1]
2512*c0909341SAndroid Build Coastguard Worker        smull2          v5.4s,  v19.8h, v1.h[1]
2513*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[2]
2514*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v19.8h, v1.h[2]
2515*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v20.4h, v1.h[2]
2516*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v20.8h, v1.h[2]
2517*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[3]
2518*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v20.8h, v1.h[3]
2519*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v21.4h, v1.h[3]
2520*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v21.8h, v1.h[3]
2521*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[4]
2522*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v21.8h, v1.h[4]
2523*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v22.4h, v1.h[4]
2524*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v22.8h, v1.h[4]
2525*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[5]
2526*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v22.8h, v1.h[5]
2527*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v24.4h, v1.h[5]
2528*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v24.8h, v1.h[5]
2529*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v24.4h, v1.h[6]
2530*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v24.8h, v1.h[6]
2531*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v25.4h, v1.h[6]
2532*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v25.8h, v1.h[6]
2533*c0909341SAndroid Build Coastguard Worker.else   // 8tap
2534*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,  v16.4h, v1.h[0]
2535*c0909341SAndroid Build Coastguard Worker        smull2          v3.4s,  v16.8h, v1.h[0]
2536*c0909341SAndroid Build Coastguard Worker        bl              L(\type\()_\taps\()_filter_8)
2537*c0909341SAndroid Build Coastguard Worker        smull           v4.4s,  v17.4h, v1.h[0]
2538*c0909341SAndroid Build Coastguard Worker        smull2          v5.4s,  v17.8h, v1.h[0]
2539*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v17.4h, v1.h[1]
2540*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v17.8h, v1.h[1]
2541*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v18.4h, v1.h[1]
2542*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v18.8h, v1.h[1]
2543*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v18.4h, v1.h[2]
2544*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v18.8h, v1.h[2]
2545*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v19.4h, v1.h[2]
2546*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v19.8h, v1.h[2]
2547*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v19.4h, v1.h[3]
2548*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v19.8h, v1.h[3]
2549*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v20.4h, v1.h[3]
2550*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v20.8h, v1.h[3]
2551*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v20.4h, v1.h[4]
2552*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v20.8h, v1.h[4]
2553*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v21.4h, v1.h[4]
2554*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v21.8h, v1.h[4]
2555*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v21.4h, v1.h[5]
2556*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v21.8h, v1.h[5]
2557*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v22.4h, v1.h[5]
2558*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v22.8h, v1.h[5]
2559*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v22.4h, v1.h[6]
2560*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v22.8h, v1.h[6]
2561*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v24.4h, v1.h[6]
2562*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v24.8h, v1.h[6]
2563*c0909341SAndroid Build Coastguard Worker        smlal           v2.4s,  v24.4h, v1.h[7]
2564*c0909341SAndroid Build Coastguard Worker        smlal2          v3.4s,  v24.8h, v1.h[7]
2565*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,  v25.4h, v1.h[7]
2566*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,  v25.8h, v1.h[7]
2567*c0909341SAndroid Build Coastguard Worker.endif
2568*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,  v2.4s,  #\shift_hv
2569*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v2.8h,  v3.4s,  #\shift_hv
2570*c0909341SAndroid Build Coastguard Worker        sqrshrn         v4.4h,  v4.4s,  #\shift_hv
2571*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v4.8h,  v5.4s,  #\shift_hv
2572*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2573*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2574*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,  v2.8h
2575*c0909341SAndroid Build Coastguard Worker        sqxtun          v4.8b,  v4.8h
2576*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [\dst], \d_strd
2577*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [\ds2], \d_strd
2578*c0909341SAndroid Build Coastguard Worker.else
2579*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [\dst], \d_strd
2580*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [\ds2], \d_strd
2581*c0909341SAndroid Build Coastguard Worker.endif
2582*c0909341SAndroid Build Coastguard Worker        b.le            9f
2583*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap
2584*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
2585*c0909341SAndroid Build Coastguard Worker        mov             v17.16b, v19.16b
2586*c0909341SAndroid Build Coastguard Worker.endif
2587*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v20.16b
2588*c0909341SAndroid Build Coastguard Worker        mov             v19.16b, v21.16b
2589*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v22.16b
2590*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v24.16b
2591*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v25.16b
2592*c0909341SAndroid Build Coastguard Worker        b               88b
2593*c0909341SAndroid Build Coastguard Worker9:
2594*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
2595*c0909341SAndroid Build Coastguard Worker        b.le            0f
2596*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
2597*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
2598*c0909341SAndroid Build Coastguard Worker        msub            \src,  \s_strd,  \xmy,  \src
2599*c0909341SAndroid Build Coastguard Worker        msub            \dst,  \d_strd,  \xmy,  \dst
2600*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #3
2601*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
2602*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
2603*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2604*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
2605*c0909341SAndroid Build Coastguard Worker.else
2606*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
2607*c0909341SAndroid Build Coastguard Worker.endif
2608*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2609*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  \s_strd,  lsl #1
2610*c0909341SAndroid Build Coastguard Worker.endif
2611*c0909341SAndroid Build Coastguard Worker        b               168b
2612*c0909341SAndroid Build Coastguard Worker0:
2613*c0909341SAndroid Build Coastguard Worker        ret             x15
2614*c0909341SAndroid Build Coastguard Worker
2615*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_8_first):
2616*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b, v29.8b},  [\src], \s_strd
2617*c0909341SAndroid Build Coastguard Worker        uxtl            v28.8h,  v28.8b
2618*c0909341SAndroid Build Coastguard Worker        uxtl            v29.8h,  v29.8b
2619*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2620*c0909341SAndroid Build Coastguard Worker        mul             v16.8h,  v28.8h,  v0.h[1]
2621*c0909341SAndroid Build Coastguard Worker        ext             v25.16b, v28.16b, v29.16b, #(2*1)
2622*c0909341SAndroid Build Coastguard Worker        ext             v26.16b, v28.16b, v29.16b, #(2*2)
2623*c0909341SAndroid Build Coastguard Worker        ext             v27.16b, v28.16b, v29.16b, #(2*3)
2624*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v25.8h,  v0.h[2]
2625*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v26.8h,  v0.h[3]
2626*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v27.8h,  v0.h[4]
2627*c0909341SAndroid Build Coastguard Worker        ext             v24.16b, v28.16b, v29.16b, #(2*4)
2628*c0909341SAndroid Build Coastguard Worker        ext             v25.16b, v28.16b, v29.16b, #(2*5)
2629*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v24.8h,  v0.h[5]
2630*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v25.8h,  v0.h[6]
2631*c0909341SAndroid Build Coastguard Worker.else   // 8tap
2632*c0909341SAndroid Build Coastguard Worker        mul             v16.8h,  v28.8h,  v0.h[0]
2633*c0909341SAndroid Build Coastguard Worker        ext             v24.16b, v28.16b, v29.16b, #(2*1)
2634*c0909341SAndroid Build Coastguard Worker        ext             v25.16b, v28.16b, v29.16b, #(2*2)
2635*c0909341SAndroid Build Coastguard Worker        ext             v26.16b, v28.16b, v29.16b, #(2*3)
2636*c0909341SAndroid Build Coastguard Worker        ext             v27.16b, v28.16b, v29.16b, #(2*4)
2637*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v24.8h,  v0.h[1]
2638*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v25.8h,  v0.h[2]
2639*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v26.8h,  v0.h[3]
2640*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v27.8h,  v0.h[4]
2641*c0909341SAndroid Build Coastguard Worker        ext             v24.16b, v28.16b, v29.16b, #(2*5)
2642*c0909341SAndroid Build Coastguard Worker        ext             v25.16b, v28.16b, v29.16b, #(2*6)
2643*c0909341SAndroid Build Coastguard Worker        ext             v26.16b, v28.16b, v29.16b, #(2*7)
2644*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v24.8h,  v0.h[5]
2645*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v25.8h,  v0.h[6]
2646*c0909341SAndroid Build Coastguard Worker        mla             v16.8h,  v26.8h,  v0.h[7]
2647*c0909341SAndroid Build Coastguard Worker.endif
2648*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #2
2649*c0909341SAndroid Build Coastguard Worker        ret
2650*c0909341SAndroid Build Coastguard Worker
2651*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_8):
2652*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b, v29.8b},  [\sr2], \s_strd
2653*c0909341SAndroid Build Coastguard Worker        ld1             {v30.8b, v31.8b},  [\src], \s_strd
2654*c0909341SAndroid Build Coastguard Worker        uxtl            v28.8h,  v28.8b
2655*c0909341SAndroid Build Coastguard Worker        uxtl            v29.8h,  v29.8b
2656*c0909341SAndroid Build Coastguard Worker        uxtl            v30.8h,  v30.8b
2657*c0909341SAndroid Build Coastguard Worker        uxtl            v31.8h,  v31.8b
2658*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap
2659*c0909341SAndroid Build Coastguard Worker        mul             v24.8h,  v28.8h,  v0.h[1]
2660*c0909341SAndroid Build Coastguard Worker        mul             v25.8h,  v30.8h,  v0.h[1]
2661*c0909341SAndroid Build Coastguard Worker    .irpc i, 23456
2662*c0909341SAndroid Build Coastguard Worker        ext             v26.16b, v28.16b, v29.16b, #(2*\i-2)
2663*c0909341SAndroid Build Coastguard Worker        ext             v27.16b, v30.16b, v31.16b, #(2*\i-2)
2664*c0909341SAndroid Build Coastguard Worker        mla             v24.8h,  v26.8h,  v0.h[\i]
2665*c0909341SAndroid Build Coastguard Worker        mla             v25.8h,  v27.8h,  v0.h[\i]
2666*c0909341SAndroid Build Coastguard Worker    .endr
2667*c0909341SAndroid Build Coastguard Worker.else   // 8tap
2668*c0909341SAndroid Build Coastguard Worker        mul             v24.8h,  v28.8h,  v0.h[0]
2669*c0909341SAndroid Build Coastguard Worker        mul             v25.8h,  v30.8h,  v0.h[0]
2670*c0909341SAndroid Build Coastguard Worker    .irpc i, 1234567
2671*c0909341SAndroid Build Coastguard Worker        ext             v26.16b, v28.16b, v29.16b, #(2*\i)
2672*c0909341SAndroid Build Coastguard Worker        ext             v27.16b, v30.16b, v31.16b, #(2*\i)
2673*c0909341SAndroid Build Coastguard Worker        mla             v24.8h,  v26.8h,  v0.h[\i]
2674*c0909341SAndroid Build Coastguard Worker        mla             v25.8h,  v27.8h,  v0.h[\i]
2675*c0909341SAndroid Build Coastguard Worker    .endr
2676*c0909341SAndroid Build Coastguard Worker.endif
2677*c0909341SAndroid Build Coastguard Worker        srshr           v24.8h,  v24.8h, #2
2678*c0909341SAndroid Build Coastguard Worker        srshr           v25.8h,  v25.8h, #2
2679*c0909341SAndroid Build Coastguard Worker        ret
2680*c0909341SAndroid Build Coastguard Workerendfunc
2681*c0909341SAndroid Build Coastguard Worker
2682*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_hv_tbl
2683*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_\taps\()_hv_tbl
2684*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_\taps\()_hv_tbl
2685*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_\taps\()_hv_tbl
2686*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_\taps\()_hv_tbl
2687*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_\taps\()_hv_tbl
2688*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_\taps\()_hv_tbl
2689*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_\taps\()_hv_tbl
2690*c0909341SAndroid Build Coastguard Workerendjumptable
2691*c0909341SAndroid Build Coastguard Worker.endm
2692*c0909341SAndroid Build Coastguard Worker
2693*c0909341SAndroid Build Coastguard Worker
2694*c0909341SAndroid Build Coastguard Worker.macro filter_bilin_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, ds2, sr2, shift_hv
2695*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_8bpc_neon, export=1
2696*c0909341SAndroid Build Coastguard Worker        dup             v1.16b, \mx
2697*c0909341SAndroid Build Coastguard Worker        dup             v3.16b, \my
2698*c0909341SAndroid Build Coastguard Worker        mov             w9,  #16
2699*c0909341SAndroid Build Coastguard Worker        sub             w8, w9, \mx
2700*c0909341SAndroid Build Coastguard Worker        sub             w9, w9, \my
2701*c0909341SAndroid Build Coastguard Worker        dup             v0.16b, w8
2702*c0909341SAndroid Build Coastguard Worker        dup             v2.16b, w9
2703*c0909341SAndroid Build Coastguard Worker.ifc \type, prep
2704*c0909341SAndroid Build Coastguard Worker        uxtw            \d_strd, \w
2705*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
2706*c0909341SAndroid Build Coastguard Worker.endif
2707*c0909341SAndroid Build Coastguard Worker
2708*c0909341SAndroid Build Coastguard Worker        clz             w8,  \w
2709*c0909341SAndroid Build Coastguard Worker        sub             w8,  w8,  #24
2710*c0909341SAndroid Build Coastguard Worker        cbnz            \mx, L(\type\()_bilin_h)
2711*c0909341SAndroid Build Coastguard Worker        cbnz            \my, L(\type\()_bilin_v)
2712*c0909341SAndroid Build Coastguard Worker        b               \type\()_neon
2713*c0909341SAndroid Build Coastguard Worker
2714*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h):
2715*c0909341SAndroid Build Coastguard Worker        cbnz            \my, L(\type\()_bilin_hv)
2716*c0909341SAndroid Build Coastguard Worker
2717*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_bilin_h_tbl
2718*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
2719*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
2720*c0909341SAndroid Build Coastguard Worker        br              x9
2721*c0909341SAndroid Build Coastguard Worker
2722*c0909341SAndroid Build Coastguard Worker20:     // 2xN h
2723*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2724*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2725*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2726*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2727*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2728*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2729*c0909341SAndroid Build Coastguard Worker2:
2730*c0909341SAndroid Build Coastguard Worker        ld1r            {v4.4s},  [\src], \s_strd
2731*c0909341SAndroid Build Coastguard Worker        ld1r            {v6.4s},  [\sr2], \s_strd
2732*c0909341SAndroid Build Coastguard Worker        ext             v5.8b,  v4.8b,  v4.8b, #1
2733*c0909341SAndroid Build Coastguard Worker        ext             v7.8b,  v6.8b,  v6.8b, #1
2734*c0909341SAndroid Build Coastguard Worker        trn1            v4.4h,  v4.4h,  v6.4h
2735*c0909341SAndroid Build Coastguard Worker        trn1            v5.4h,  v5.4h,  v7.4h
2736*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2737*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v4.8b,  v0.8b
2738*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v5.8b,  v1.8b
2739*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2740*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[0], [\dst], \d_strd
2741*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[1], [\ds2], \d_strd
2742*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2743*c0909341SAndroid Build Coastguard Worker        ret
2744*c0909341SAndroid Build Coastguard Worker.endif
2745*c0909341SAndroid Build Coastguard Worker
2746*c0909341SAndroid Build Coastguard Worker40:     // 4xN h
2747*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2748*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2749*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2750*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2751*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2752*c0909341SAndroid Build Coastguard Worker4:
2753*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8b}, [\src], \s_strd
2754*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8b}, [\sr2], \s_strd
2755*c0909341SAndroid Build Coastguard Worker        ext             v5.8b,  v4.8b,  v4.8b, #1
2756*c0909341SAndroid Build Coastguard Worker        ext             v7.8b,  v6.8b,  v6.8b, #1
2757*c0909341SAndroid Build Coastguard Worker        trn1            v4.2s,  v4.2s,  v6.2s
2758*c0909341SAndroid Build Coastguard Worker        trn1            v5.2s,  v5.2s,  v7.2s
2759*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2760*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v4.8b,  v0.8b
2761*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v5.8b,  v1.8b
2762*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2763*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2764*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[0], [\dst], \d_strd
2765*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[1], [\ds2], \d_strd
2766*c0909341SAndroid Build Coastguard Worker.else
2767*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b},   [\dst], \d_strd
2768*c0909341SAndroid Build Coastguard Worker        st1             {v4.d}[1], [\ds2], \d_strd
2769*c0909341SAndroid Build Coastguard Worker.endif
2770*c0909341SAndroid Build Coastguard Worker        b.gt            4b
2771*c0909341SAndroid Build Coastguard Worker        ret
2772*c0909341SAndroid Build Coastguard Worker
2773*c0909341SAndroid Build Coastguard Worker80:     // 8xN h
2774*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2775*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2776*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2777*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2778*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2779*c0909341SAndroid Build Coastguard Worker8:
2780*c0909341SAndroid Build Coastguard Worker        ld1             {v4.16b}, [\src], \s_strd
2781*c0909341SAndroid Build Coastguard Worker        ld1             {v6.16b}, [\sr2], \s_strd
2782*c0909341SAndroid Build Coastguard Worker        ext             v5.16b, v4.16b, v4.16b, #1
2783*c0909341SAndroid Build Coastguard Worker        ext             v7.16b, v6.16b, v6.16b, #1
2784*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2785*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v4.8b,  v0.8b
2786*c0909341SAndroid Build Coastguard Worker        umull           v6.8h,  v6.8b,  v0.8b
2787*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v5.8b,  v1.8b
2788*c0909341SAndroid Build Coastguard Worker        umlal           v6.8h,  v7.8b,  v1.8b
2789*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2790*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2791*c0909341SAndroid Build Coastguard Worker        uqrshrn         v6.8b,  v6.8h,  #4
2792*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [\dst], \d_strd
2793*c0909341SAndroid Build Coastguard Worker        st1             {v6.8b}, [\ds2], \d_strd
2794*c0909341SAndroid Build Coastguard Worker.else
2795*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [\dst], \d_strd
2796*c0909341SAndroid Build Coastguard Worker        st1             {v6.8h}, [\ds2], \d_strd
2797*c0909341SAndroid Build Coastguard Worker.endif
2798*c0909341SAndroid Build Coastguard Worker        b.gt            8b
2799*c0909341SAndroid Build Coastguard Worker        ret
2800*c0909341SAndroid Build Coastguard Worker160:
2801*c0909341SAndroid Build Coastguard Worker320:
2802*c0909341SAndroid Build Coastguard Worker640:
2803*c0909341SAndroid Build Coastguard Worker1280:   // 16xN, 32xN, ... h
2804*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2805*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2806*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2807*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2808*c0909341SAndroid Build Coastguard Worker
2809*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  \w, uxtw
2810*c0909341SAndroid Build Coastguard Worker        sub             \s_strd,  \s_strd,  #8
2811*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2812*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2813*c0909341SAndroid Build Coastguard Worker        sub             \d_strd,  \d_strd,  \w, uxtw
2814*c0909341SAndroid Build Coastguard Worker.endif
2815*c0909341SAndroid Build Coastguard Worker161:
2816*c0909341SAndroid Build Coastguard Worker        ld1             {v16.d}[1],  [\src], #8
2817*c0909341SAndroid Build Coastguard Worker        ld1             {v20.d}[1],  [\sr2], #8
2818*c0909341SAndroid Build Coastguard Worker        mov             \mx, \w
2819*c0909341SAndroid Build Coastguard Worker
2820*c0909341SAndroid Build Coastguard Worker16:
2821*c0909341SAndroid Build Coastguard Worker        ld1             {v18.16b},  [\src], #16
2822*c0909341SAndroid Build Coastguard Worker        ld1             {v22.16b},  [\sr2], #16
2823*c0909341SAndroid Build Coastguard Worker        ext             v17.16b, v16.16b, v18.16b, #8
2824*c0909341SAndroid Build Coastguard Worker        ext             v19.16b, v16.16b, v18.16b, #9
2825*c0909341SAndroid Build Coastguard Worker        ext             v21.16b, v20.16b, v22.16b, #8
2826*c0909341SAndroid Build Coastguard Worker        ext             v23.16b, v20.16b, v22.16b, #9
2827*c0909341SAndroid Build Coastguard Worker        umull           v16.8h,  v17.8b,  v0.8b
2828*c0909341SAndroid Build Coastguard Worker        umull2          v17.8h,  v17.16b, v0.16b
2829*c0909341SAndroid Build Coastguard Worker        umull           v20.8h,  v21.8b,  v0.8b
2830*c0909341SAndroid Build Coastguard Worker        umull2          v21.8h,  v21.16b, v0.16b
2831*c0909341SAndroid Build Coastguard Worker        umlal           v16.8h,  v19.8b,  v1.8b
2832*c0909341SAndroid Build Coastguard Worker        umlal2          v17.8h,  v19.16b, v1.16b
2833*c0909341SAndroid Build Coastguard Worker        umlal           v20.8h,  v23.8b,  v1.8b
2834*c0909341SAndroid Build Coastguard Worker        umlal2          v21.8h,  v23.16b, v1.16b
2835*c0909341SAndroid Build Coastguard Worker        subs            \mx, \mx, #16
2836*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2837*c0909341SAndroid Build Coastguard Worker        uqrshrn         v16.8b,  v16.8h, #4
2838*c0909341SAndroid Build Coastguard Worker        uqrshrn2        v16.16b, v17.8h, #4
2839*c0909341SAndroid Build Coastguard Worker        uqrshrn         v20.8b,  v20.8h, #4
2840*c0909341SAndroid Build Coastguard Worker        uqrshrn2        v20.16b, v21.8h, #4
2841*c0909341SAndroid Build Coastguard Worker        st1             {v16.16b}, [\dst], #16
2842*c0909341SAndroid Build Coastguard Worker        st1             {v20.16b}, [\ds2], #16
2843*c0909341SAndroid Build Coastguard Worker.else
2844*c0909341SAndroid Build Coastguard Worker        st1             {v16.8h, v17.8h}, [\dst], #32
2845*c0909341SAndroid Build Coastguard Worker        st1             {v20.8h, v21.8h}, [\ds2], #32
2846*c0909341SAndroid Build Coastguard Worker.endif
2847*c0909341SAndroid Build Coastguard Worker        b.le            9f
2848*c0909341SAndroid Build Coastguard Worker
2849*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
2850*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v22.16b
2851*c0909341SAndroid Build Coastguard Worker        b               16b
2852*c0909341SAndroid Build Coastguard Worker
2853*c0909341SAndroid Build Coastguard Worker9:
2854*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  \d_strd
2855*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \ds2,  \d_strd
2856*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  \s_strd
2857*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \sr2,  \s_strd
2858*c0909341SAndroid Build Coastguard Worker
2859*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2860*c0909341SAndroid Build Coastguard Worker        b.gt            161b
2861*c0909341SAndroid Build Coastguard Worker        ret
2862*c0909341SAndroid Build Coastguard Workerendfunc
2863*c0909341SAndroid Build Coastguard Worker
2864*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_h_tbl
2865*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_bilin_h_tbl
2866*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_bilin_h_tbl
2867*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_bilin_h_tbl
2868*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_bilin_h_tbl
2869*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_bilin_h_tbl
2870*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_bilin_h_tbl
2871*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_bilin_h_tbl
2872*c0909341SAndroid Build Coastguard Workerendjumptable
2873*c0909341SAndroid Build Coastguard Worker
2874*c0909341SAndroid Build Coastguard Worker
2875*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_v)
2876*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #4
2877*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_bilin_v_tbl
2878*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
2879*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
2880*c0909341SAndroid Build Coastguard Worker        br              x9
2881*c0909341SAndroid Build Coastguard Worker
2882*c0909341SAndroid Build Coastguard Worker20:     // 2xN v
2883*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2884*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2885*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
2886*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2887*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2888*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2889*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2890*c0909341SAndroid Build Coastguard Worker
2891*c0909341SAndroid Build Coastguard Worker        // 2x2 v
2892*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.8h}, [\src], \s_strd
2893*c0909341SAndroid Build Coastguard Worker        b.gt            24f
2894*c0909341SAndroid Build Coastguard Worker22:
2895*c0909341SAndroid Build Coastguard Worker        ld1r            {v17.8h}, [\sr2], \s_strd
2896*c0909341SAndroid Build Coastguard Worker        ld1r            {v18.8h}, [\src], \s_strd
2897*c0909341SAndroid Build Coastguard Worker        trn1            v16.4h, v16.4h, v17.4h
2898*c0909341SAndroid Build Coastguard Worker        trn1            v17.4h, v17.4h, v18.4h
2899*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v16.8b,  v2.8b
2900*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v17.8b,  v3.8b
2901*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2902*c0909341SAndroid Build Coastguard Worker        str             h4,        [\dst]
2903*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[1], [\ds2]
2904*c0909341SAndroid Build Coastguard Worker        ret
2905*c0909341SAndroid Build Coastguard Worker24:     // 2x4, 2x6, 2x8, ... v
2906*c0909341SAndroid Build Coastguard Worker        ld1r            {v17.8h}, [\sr2], \s_strd
2907*c0909341SAndroid Build Coastguard Worker        ld1r            {v18.8h}, [\src], \s_strd
2908*c0909341SAndroid Build Coastguard Worker        ld1r            {v19.8h}, [\sr2], \s_strd
2909*c0909341SAndroid Build Coastguard Worker        ld1r            {v20.8h}, [\src], \s_strd
2910*c0909341SAndroid Build Coastguard Worker        sub             \h,  \h,  #4
2911*c0909341SAndroid Build Coastguard Worker        trn1            v16.4h, v16.4h, v17.4h
2912*c0909341SAndroid Build Coastguard Worker        trn1            v17.4h, v17.4h, v18.4h
2913*c0909341SAndroid Build Coastguard Worker        trn1            v18.4h, v18.4h, v19.4h
2914*c0909341SAndroid Build Coastguard Worker        trn1            v19.4h, v19.4h, v20.4h
2915*c0909341SAndroid Build Coastguard Worker        trn1            v16.2s, v16.2s, v18.2s
2916*c0909341SAndroid Build Coastguard Worker        trn1            v17.2s, v17.2s, v19.2s
2917*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v16.8b,  v2.8b
2918*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v17.8b,  v3.8b
2919*c0909341SAndroid Build Coastguard Worker        cmp             \h,  #2
2920*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2921*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[0], [\dst], \d_strd
2922*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[1], [\ds2], \d_strd
2923*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[2], [\dst], \d_strd
2924*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[3], [\ds2], \d_strd
2925*c0909341SAndroid Build Coastguard Worker        b.lt            0f
2926*c0909341SAndroid Build Coastguard Worker        mov             v16.8b, v20.8b
2927*c0909341SAndroid Build Coastguard Worker        b.eq            22b
2928*c0909341SAndroid Build Coastguard Worker        b               24b
2929*c0909341SAndroid Build Coastguard Worker0:
2930*c0909341SAndroid Build Coastguard Worker        ret
2931*c0909341SAndroid Build Coastguard Worker.endif
2932*c0909341SAndroid Build Coastguard Worker
2933*c0909341SAndroid Build Coastguard Worker40:     // 4xN v
2934*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2935*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2936*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2937*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2938*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2939*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.4s}, [\src], \s_strd
2940*c0909341SAndroid Build Coastguard Worker4:
2941*c0909341SAndroid Build Coastguard Worker        ld1r            {v17.4s}, [\sr2], \s_strd
2942*c0909341SAndroid Build Coastguard Worker        ld1r            {v18.4s}, [\src], \s_strd
2943*c0909341SAndroid Build Coastguard Worker        trn1            v16.2s, v16.2s, v17.2s
2944*c0909341SAndroid Build Coastguard Worker        trn1            v17.2s, v17.2s, v18.2s
2945*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v16.8b,  v2.8b
2946*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v17.8b,  v3.8b
2947*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2948*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2949*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2950*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[0], [\dst], \d_strd
2951*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[1], [\ds2], \d_strd
2952*c0909341SAndroid Build Coastguard Worker.else
2953*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b},   [\dst], \d_strd
2954*c0909341SAndroid Build Coastguard Worker        st1             {v4.d}[1], [\ds2], \d_strd
2955*c0909341SAndroid Build Coastguard Worker.endif
2956*c0909341SAndroid Build Coastguard Worker        b.le            0f
2957*c0909341SAndroid Build Coastguard Worker        mov             v16.8b, v18.8b
2958*c0909341SAndroid Build Coastguard Worker        b               4b
2959*c0909341SAndroid Build Coastguard Worker0:
2960*c0909341SAndroid Build Coastguard Worker        ret
2961*c0909341SAndroid Build Coastguard Worker
2962*c0909341SAndroid Build Coastguard Worker80:     // 8xN v
2963*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2964*c0909341SAndroid Build Coastguard Worker        add             \ds2,  \dst,  \d_strd
2965*c0909341SAndroid Build Coastguard Worker        add             \sr2,  \src,  \s_strd
2966*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd,  \s_strd,  #1
2967*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd,  \d_strd,  #1
2968*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8b}, [\src], \s_strd
2969*c0909341SAndroid Build Coastguard Worker8:
2970*c0909341SAndroid Build Coastguard Worker        ld1             {v17.8b}, [\sr2], \s_strd
2971*c0909341SAndroid Build Coastguard Worker        ld1             {v18.8b}, [\src], \s_strd
2972*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v16.8b,  v2.8b
2973*c0909341SAndroid Build Coastguard Worker        umull           v5.8h,  v17.8b,  v2.8b
2974*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v17.8b,  v3.8b
2975*c0909341SAndroid Build Coastguard Worker        umlal           v5.8h,  v18.8b,  v3.8b
2976*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
2977*c0909341SAndroid Build Coastguard Worker.ifc \type, put
2978*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
2979*c0909341SAndroid Build Coastguard Worker        uqrshrn         v5.8b,  v5.8h,  #4
2980*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [\dst], \d_strd
2981*c0909341SAndroid Build Coastguard Worker        st1             {v5.8b}, [\ds2], \d_strd
2982*c0909341SAndroid Build Coastguard Worker.else
2983*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [\dst], \d_strd
2984*c0909341SAndroid Build Coastguard Worker        st1             {v5.8h}, [\ds2], \d_strd
2985*c0909341SAndroid Build Coastguard Worker.endif
2986*c0909341SAndroid Build Coastguard Worker        b.le            0f
2987*c0909341SAndroid Build Coastguard Worker        mov             v16.8b, v18.8b
2988*c0909341SAndroid Build Coastguard Worker        b               8b
2989*c0909341SAndroid Build Coastguard Worker0:
2990*c0909341SAndroid Build Coastguard Worker        ret
2991*c0909341SAndroid Build Coastguard Worker
2992*c0909341SAndroid Build Coastguard Worker160:    // 16xN, 32xN, ...
2993*c0909341SAndroid Build Coastguard Worker320:
2994*c0909341SAndroid Build Coastguard Worker640:
2995*c0909341SAndroid Build Coastguard Worker1280:
2996*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
2997*c0909341SAndroid Build Coastguard Worker        mov             \my,  \h
2998*c0909341SAndroid Build Coastguard Worker1:
2999*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
3000*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
3001*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
3002*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
3003*c0909341SAndroid Build Coastguard Worker
3004*c0909341SAndroid Build Coastguard Worker        ld1             {v16.16b}, [\src], \s_strd
3005*c0909341SAndroid Build Coastguard Worker2:
3006*c0909341SAndroid Build Coastguard Worker        ld1             {v17.16b}, [\sr2], \s_strd
3007*c0909341SAndroid Build Coastguard Worker        ld1             {v18.16b}, [\src], \s_strd
3008*c0909341SAndroid Build Coastguard Worker        umull           v4.8h,  v16.8b,  v2.8b
3009*c0909341SAndroid Build Coastguard Worker        umull2          v5.8h,  v16.16b, v2.16b
3010*c0909341SAndroid Build Coastguard Worker        umull           v6.8h,  v17.8b,  v2.8b
3011*c0909341SAndroid Build Coastguard Worker        umull2          v7.8h,  v17.16b, v2.16b
3012*c0909341SAndroid Build Coastguard Worker        umlal           v4.8h,  v17.8b,  v3.8b
3013*c0909341SAndroid Build Coastguard Worker        umlal2          v5.8h,  v17.16b, v3.16b
3014*c0909341SAndroid Build Coastguard Worker        umlal           v6.8h,  v18.8b,  v3.8b
3015*c0909341SAndroid Build Coastguard Worker        umlal2          v7.8h,  v18.16b, v3.16b
3016*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
3017*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3018*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #4
3019*c0909341SAndroid Build Coastguard Worker        uqrshrn2        v4.16b, v5.8h,  #4
3020*c0909341SAndroid Build Coastguard Worker        uqrshrn         v6.8b,  v6.8h,  #4
3021*c0909341SAndroid Build Coastguard Worker        uqrshrn2        v6.16b, v7.8h,  #4
3022*c0909341SAndroid Build Coastguard Worker        st1             {v4.16b}, [\dst], \d_strd
3023*c0909341SAndroid Build Coastguard Worker        st1             {v6.16b}, [\ds2], \d_strd
3024*c0909341SAndroid Build Coastguard Worker.else
3025*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h}, [\dst], \d_strd
3026*c0909341SAndroid Build Coastguard Worker        st1             {v6.8h, v7.8h}, [\ds2], \d_strd
3027*c0909341SAndroid Build Coastguard Worker.endif
3028*c0909341SAndroid Build Coastguard Worker        b.le            9f
3029*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
3030*c0909341SAndroid Build Coastguard Worker        b               2b
3031*c0909341SAndroid Build Coastguard Worker9:
3032*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #16
3033*c0909341SAndroid Build Coastguard Worker        b.le            0f
3034*c0909341SAndroid Build Coastguard Worker        asr             \s_strd, \s_strd, #1
3035*c0909341SAndroid Build Coastguard Worker        asr             \d_strd, \d_strd, #1
3036*c0909341SAndroid Build Coastguard Worker        msub            \src, \s_strd, \xmy, \src
3037*c0909341SAndroid Build Coastguard Worker        msub            \dst, \d_strd, \xmy, \dst
3038*c0909341SAndroid Build Coastguard Worker        sub             \src, \src, \s_strd, lsl #1
3039*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
3040*c0909341SAndroid Build Coastguard Worker        add             \src, \src, #16
3041*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3042*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #16
3043*c0909341SAndroid Build Coastguard Worker.else
3044*c0909341SAndroid Build Coastguard Worker        add             \dst, \dst, #32
3045*c0909341SAndroid Build Coastguard Worker.endif
3046*c0909341SAndroid Build Coastguard Worker        b               1b
3047*c0909341SAndroid Build Coastguard Worker0:
3048*c0909341SAndroid Build Coastguard Worker        ret
3049*c0909341SAndroid Build Coastguard Workerendfunc
3050*c0909341SAndroid Build Coastguard Worker
3051*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_v_tbl
3052*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_bilin_v_tbl
3053*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_bilin_v_tbl
3054*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_bilin_v_tbl
3055*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_bilin_v_tbl
3056*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_bilin_v_tbl
3057*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_bilin_v_tbl
3058*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_bilin_v_tbl
3059*c0909341SAndroid Build Coastguard Workerendjumptable
3060*c0909341SAndroid Build Coastguard Worker
3061*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_hv)
3062*c0909341SAndroid Build Coastguard Worker        uxtl            v2.8h, v2.8b
3063*c0909341SAndroid Build Coastguard Worker        uxtl            v3.8h, v3.8b
3064*c0909341SAndroid Build Coastguard Worker        movrel          x9,  \type\()_bilin_hv_tbl
3065*c0909341SAndroid Build Coastguard Worker        ldrsw           x8,  [x9, x8, lsl #2]
3066*c0909341SAndroid Build Coastguard Worker        add             x9,  x9,  x8
3067*c0909341SAndroid Build Coastguard Worker        br              x9
3068*c0909341SAndroid Build Coastguard Worker
3069*c0909341SAndroid Build Coastguard Worker20:     // 2xN hv
3070*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
3071*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3072*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
3073*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
3074*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
3075*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
3076*c0909341SAndroid Build Coastguard Worker
3077*c0909341SAndroid Build Coastguard Worker        ld1r            {v28.4s},  [\src], \s_strd
3078*c0909341SAndroid Build Coastguard Worker        ext             v29.8b, v28.8b, v28.8b, #1
3079*c0909341SAndroid Build Coastguard Worker        umull           v16.8h, v28.8b, v0.8b
3080*c0909341SAndroid Build Coastguard Worker        umlal           v16.8h, v29.8b, v1.8b
3081*c0909341SAndroid Build Coastguard Worker
3082*c0909341SAndroid Build Coastguard Worker2:
3083*c0909341SAndroid Build Coastguard Worker        ld1r            {v28.4s},  [\sr2], \s_strd
3084*c0909341SAndroid Build Coastguard Worker        ld1r            {v30.4s},  [\src], \s_strd
3085*c0909341SAndroid Build Coastguard Worker        ext             v29.8b, v28.8b, v28.8b, #1
3086*c0909341SAndroid Build Coastguard Worker        ext             v31.8b, v30.8b, v30.8b, #1
3087*c0909341SAndroid Build Coastguard Worker        trn1            v28.4h, v28.4h, v30.4h
3088*c0909341SAndroid Build Coastguard Worker        trn1            v29.4h, v29.4h, v31.4h
3089*c0909341SAndroid Build Coastguard Worker        umull           v17.8h, v28.8b, v0.8b
3090*c0909341SAndroid Build Coastguard Worker        umlal           v17.8h, v29.8b, v1.8b
3091*c0909341SAndroid Build Coastguard Worker
3092*c0909341SAndroid Build Coastguard Worker        trn1            v16.2s, v16.2s, v17.2s
3093*c0909341SAndroid Build Coastguard Worker
3094*c0909341SAndroid Build Coastguard Worker        mul             v4.4h,  v16.4h, v2.4h
3095*c0909341SAndroid Build Coastguard Worker        mla             v4.4h,  v17.4h, v3.4h
3096*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #8
3097*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
3098*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[0], [\dst], \d_strd
3099*c0909341SAndroid Build Coastguard Worker        st1             {v4.h}[1], [\ds2], \d_strd
3100*c0909341SAndroid Build Coastguard Worker        b.le            0f
3101*c0909341SAndroid Build Coastguard Worker        trn2            v16.2s, v17.2s, v17.2s
3102*c0909341SAndroid Build Coastguard Worker        b               2b
3103*c0909341SAndroid Build Coastguard Worker0:
3104*c0909341SAndroid Build Coastguard Worker        ret
3105*c0909341SAndroid Build Coastguard Worker.endif
3106*c0909341SAndroid Build Coastguard Worker
3107*c0909341SAndroid Build Coastguard Worker40:     // 4xN hv
3108*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
3109*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
3110*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
3111*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
3112*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
3113*c0909341SAndroid Build Coastguard Worker
3114*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b},  [\src], \s_strd
3115*c0909341SAndroid Build Coastguard Worker        ext             v29.8b, v28.8b, v28.8b, #1
3116*c0909341SAndroid Build Coastguard Worker        umull           v16.8h, v28.8b, v0.8b
3117*c0909341SAndroid Build Coastguard Worker        umlal           v16.8h, v29.8b, v1.8b
3118*c0909341SAndroid Build Coastguard Worker
3119*c0909341SAndroid Build Coastguard Worker4:
3120*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8b},  [\sr2], \s_strd
3121*c0909341SAndroid Build Coastguard Worker        ld1             {v30.8b},  [\src], \s_strd
3122*c0909341SAndroid Build Coastguard Worker        ext             v29.8b, v28.8b, v28.8b, #1
3123*c0909341SAndroid Build Coastguard Worker        ext             v31.8b, v30.8b, v30.8b, #1
3124*c0909341SAndroid Build Coastguard Worker        trn1            v28.2s, v28.2s, v30.2s
3125*c0909341SAndroid Build Coastguard Worker        trn1            v29.2s, v29.2s, v31.2s
3126*c0909341SAndroid Build Coastguard Worker        umull           v17.8h, v28.8b, v0.8b
3127*c0909341SAndroid Build Coastguard Worker        umlal           v17.8h, v29.8b, v1.8b
3128*c0909341SAndroid Build Coastguard Worker
3129*c0909341SAndroid Build Coastguard Worker        trn1            v16.2d, v16.2d, v17.2d
3130*c0909341SAndroid Build Coastguard Worker
3131*c0909341SAndroid Build Coastguard Worker        mul             v4.8h,  v16.8h, v2.8h
3132*c0909341SAndroid Build Coastguard Worker        mla             v4.8h,  v17.8h, v3.8h
3133*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
3134*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3135*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #8
3136*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[0], [\dst], \d_strd
3137*c0909341SAndroid Build Coastguard Worker        st1             {v4.s}[1], [\ds2], \d_strd
3138*c0909341SAndroid Build Coastguard Worker.else
3139*c0909341SAndroid Build Coastguard Worker        urshr           v4.8h,  v4.8h,  #4
3140*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b},   [\dst], \d_strd
3141*c0909341SAndroid Build Coastguard Worker        st1             {v4.d}[1], [\ds2], \d_strd
3142*c0909341SAndroid Build Coastguard Worker.endif
3143*c0909341SAndroid Build Coastguard Worker        b.le            0f
3144*c0909341SAndroid Build Coastguard Worker        trn2            v16.2d, v17.2d, v17.2d
3145*c0909341SAndroid Build Coastguard Worker        b               4b
3146*c0909341SAndroid Build Coastguard Worker0:
3147*c0909341SAndroid Build Coastguard Worker        ret
3148*c0909341SAndroid Build Coastguard Worker
3149*c0909341SAndroid Build Coastguard Worker80:     // 8xN, 16xN, ... hv
3150*c0909341SAndroid Build Coastguard Worker160:
3151*c0909341SAndroid Build Coastguard Worker320:
3152*c0909341SAndroid Build Coastguard Worker640:
3153*c0909341SAndroid Build Coastguard Worker1280:
3154*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_JUMP_TARGET
3155*c0909341SAndroid Build Coastguard Worker        mov             \my,  \h
3156*c0909341SAndroid Build Coastguard Worker
3157*c0909341SAndroid Build Coastguard Worker1:
3158*c0909341SAndroid Build Coastguard Worker        add             \sr2, \src, \s_strd
3159*c0909341SAndroid Build Coastguard Worker        add             \ds2, \dst, \d_strd
3160*c0909341SAndroid Build Coastguard Worker        lsl             \s_strd, \s_strd, #1
3161*c0909341SAndroid Build Coastguard Worker        lsl             \d_strd, \d_strd, #1
3162*c0909341SAndroid Build Coastguard Worker
3163*c0909341SAndroid Build Coastguard Worker        ld1             {v28.16b},  [\src], \s_strd
3164*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v28.16b, v28.16b, #1
3165*c0909341SAndroid Build Coastguard Worker        umull           v16.8h, v28.8b, v0.8b
3166*c0909341SAndroid Build Coastguard Worker        umlal           v16.8h, v29.8b, v1.8b
3167*c0909341SAndroid Build Coastguard Worker
3168*c0909341SAndroid Build Coastguard Worker2:
3169*c0909341SAndroid Build Coastguard Worker        ld1             {v28.16b},  [\sr2], \s_strd
3170*c0909341SAndroid Build Coastguard Worker        ld1             {v30.16b},  [\src], \s_strd
3171*c0909341SAndroid Build Coastguard Worker        ext             v29.16b, v28.16b, v28.16b, #1
3172*c0909341SAndroid Build Coastguard Worker        ext             v31.16b, v30.16b, v30.16b, #1
3173*c0909341SAndroid Build Coastguard Worker        umull           v17.8h, v28.8b, v0.8b
3174*c0909341SAndroid Build Coastguard Worker        umlal           v17.8h, v29.8b, v1.8b
3175*c0909341SAndroid Build Coastguard Worker        umull           v18.8h, v30.8b, v0.8b
3176*c0909341SAndroid Build Coastguard Worker        umlal           v18.8h, v31.8b, v1.8b
3177*c0909341SAndroid Build Coastguard Worker
3178*c0909341SAndroid Build Coastguard Worker        mul             v4.8h,  v16.8h, v2.8h
3179*c0909341SAndroid Build Coastguard Worker        mla             v4.8h,  v17.8h, v3.8h
3180*c0909341SAndroid Build Coastguard Worker        mul             v5.8h,  v17.8h, v2.8h
3181*c0909341SAndroid Build Coastguard Worker        mla             v5.8h,  v18.8h, v3.8h
3182*c0909341SAndroid Build Coastguard Worker        subs            \h,  \h,  #2
3183*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3184*c0909341SAndroid Build Coastguard Worker        uqrshrn         v4.8b,  v4.8h,  #8
3185*c0909341SAndroid Build Coastguard Worker        uqrshrn         v5.8b,  v5.8h,  #8
3186*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [\dst], \d_strd
3187*c0909341SAndroid Build Coastguard Worker        st1             {v5.8b}, [\ds2], \d_strd
3188*c0909341SAndroid Build Coastguard Worker.else
3189*c0909341SAndroid Build Coastguard Worker        urshr           v4.8h,  v4.8h,  #4
3190*c0909341SAndroid Build Coastguard Worker        urshr           v5.8h,  v5.8h,  #4
3191*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [\dst], \d_strd
3192*c0909341SAndroid Build Coastguard Worker        st1             {v5.8h}, [\ds2], \d_strd
3193*c0909341SAndroid Build Coastguard Worker.endif
3194*c0909341SAndroid Build Coastguard Worker        b.le            9f
3195*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v18.16b
3196*c0909341SAndroid Build Coastguard Worker        b               2b
3197*c0909341SAndroid Build Coastguard Worker9:
3198*c0909341SAndroid Build Coastguard Worker        subs            \w,  \w,  #8
3199*c0909341SAndroid Build Coastguard Worker        b.le            0f
3200*c0909341SAndroid Build Coastguard Worker        asr             \s_strd,  \s_strd,  #1
3201*c0909341SAndroid Build Coastguard Worker        asr             \d_strd,  \d_strd,  #1
3202*c0909341SAndroid Build Coastguard Worker        msub            \src,  \s_strd,  \xmy,  \src
3203*c0909341SAndroid Build Coastguard Worker        msub            \dst,  \d_strd,  \xmy,  \dst
3204*c0909341SAndroid Build Coastguard Worker        sub             \src,  \src,  \s_strd,  lsl #1
3205*c0909341SAndroid Build Coastguard Worker        mov             \h,  \my
3206*c0909341SAndroid Build Coastguard Worker        add             \src,  \src,  #8
3207*c0909341SAndroid Build Coastguard Worker.ifc \type, put
3208*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #8
3209*c0909341SAndroid Build Coastguard Worker.else
3210*c0909341SAndroid Build Coastguard Worker        add             \dst,  \dst,  #16
3211*c0909341SAndroid Build Coastguard Worker.endif
3212*c0909341SAndroid Build Coastguard Worker        b               1b
3213*c0909341SAndroid Build Coastguard Worker0:
3214*c0909341SAndroid Build Coastguard Worker        ret
3215*c0909341SAndroid Build Coastguard Workerendfunc
3216*c0909341SAndroid Build Coastguard Worker
3217*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_hv_tbl
3218*c0909341SAndroid Build Coastguard Worker        .word 1280b - \type\()_bilin_hv_tbl
3219*c0909341SAndroid Build Coastguard Worker        .word 640b  - \type\()_bilin_hv_tbl
3220*c0909341SAndroid Build Coastguard Worker        .word 320b  - \type\()_bilin_hv_tbl
3221*c0909341SAndroid Build Coastguard Worker        .word 160b  - \type\()_bilin_hv_tbl
3222*c0909341SAndroid Build Coastguard Worker        .word 80b   - \type\()_bilin_hv_tbl
3223*c0909341SAndroid Build Coastguard Worker        .word 40b   - \type\()_bilin_hv_tbl
3224*c0909341SAndroid Build Coastguard Worker        .word 20b   - \type\()_bilin_hv_tbl
3225*c0909341SAndroid Build Coastguard Workerendjumptable
3226*c0909341SAndroid Build Coastguard Worker.endm
3227*c0909341SAndroid Build Coastguard Worker
3228*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  regular_sharp,  REGULAR, SHARP,   8tap
3229*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  smooth_sharp,   SMOOTH,  SHARP,   8tap
3230*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  sharp,          SHARP,   SHARP,   8tap
3231*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  sharp_regular,  SHARP,   REGULAR, 8tap
3232*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  sharp_smooth,   SHARP,   SMOOTH,  8tap
3233*c0909341SAndroid Build Coastguard Workerfilter_fn       put,  x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10, 8tap
3234*c0909341SAndroid Build Coastguard Worker
3235*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  regular,        REGULAR, REGULAR, 6tap
3236*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  regular_smooth, REGULAR, SMOOTH,  6tap
3237*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  smooth,         SMOOTH,  SMOOTH,  6tap
3238*c0909341SAndroid Build Coastguard Workermake_8tap_fn    put,  smooth_regular, SMOOTH,  REGULAR, 6tap
3239*c0909341SAndroid Build Coastguard Workerfilter_fn       put,  x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10, 6tap
3240*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn put,  x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10
3241*c0909341SAndroid Build Coastguard Worker
3242*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, regular_sharp,  REGULAR, SHARP,   8tap
3243*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, smooth_sharp,   SMOOTH,  SHARP,   8tap
3244*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, sharp,          SHARP,   SHARP,   8tap
3245*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, sharp_regular,  SHARP,   REGULAR, 8tap
3246*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, sharp_smooth,   SHARP,   SMOOTH,  8tap
3247*c0909341SAndroid Build Coastguard Workerfilter_fn       prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6,  8tap
3248*c0909341SAndroid Build Coastguard Worker
3249*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, regular,        REGULAR, REGULAR, 6tap
3250*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, regular_smooth, REGULAR, SMOOTH,  6tap
3251*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, smooth,         SMOOTH,  SMOOTH,  6tap
3252*c0909341SAndroid Build Coastguard Workermake_8tap_fn    prep, smooth_regular, SMOOTH,  REGULAR, 6tap
3253*c0909341SAndroid Build Coastguard Workerfilter_fn       prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6,  6tap
3254*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6
3255*c0909341SAndroid Build Coastguard Worker
3256*c0909341SAndroid Build Coastguard Worker
3257*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc
3258*c0909341SAndroid Build Coastguard Worker        asr             w13, \src, #10
3259*c0909341SAndroid Build Coastguard Worker        add             \src, \src, \inc
3260*c0909341SAndroid Build Coastguard Worker        ldr             \dst, [x11, w13, sxtw #3]
3261*c0909341SAndroid Build Coastguard Worker.endm
3262*c0909341SAndroid Build Coastguard Worker
3263*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon
3264*c0909341SAndroid Build Coastguard Worker        add             w12, w5,  #512
3265*c0909341SAndroid Build Coastguard Worker
3266*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8b, v17.8b}, [x2], x3
3267*c0909341SAndroid Build Coastguard Worker
3268*c0909341SAndroid Build Coastguard Worker        load_filter_row d0, w12, w7
3269*c0909341SAndroid Build Coastguard Worker        load_filter_row d1, w12, w7
3270*c0909341SAndroid Build Coastguard Worker        load_filter_row d2, w12, w7
3271*c0909341SAndroid Build Coastguard Worker        load_filter_row d3, w12, w7
3272*c0909341SAndroid Build Coastguard Worker        load_filter_row d4, w12, w7
3273*c0909341SAndroid Build Coastguard Worker        load_filter_row d5, w12, w7
3274*c0909341SAndroid Build Coastguard Worker        load_filter_row d6, w12, w7
3275*c0909341SAndroid Build Coastguard Worker        // subtract by 128 to allow using smull
3276*c0909341SAndroid Build Coastguard Worker        eor             v16.8b,  v16.8b,  v22.8b
3277*c0909341SAndroid Build Coastguard Worker        eor             v17.8b,  v17.8b,  v22.8b
3278*c0909341SAndroid Build Coastguard Worker        load_filter_row d7, w12, w7
3279*c0909341SAndroid Build Coastguard Worker
3280*c0909341SAndroid Build Coastguard Worker        ext             v18.8b,  v16.8b,  v17.8b,  #1
3281*c0909341SAndroid Build Coastguard Worker        ext             v19.8b,  v16.8b,  v17.8b,  #2
3282*c0909341SAndroid Build Coastguard Worker        smull           v0.8h,   v0.8b,   v16.8b
3283*c0909341SAndroid Build Coastguard Worker        smull           v1.8h,   v1.8b,   v18.8b
3284*c0909341SAndroid Build Coastguard Worker        ext             v18.8b,  v16.8b,  v17.8b,  #3
3285*c0909341SAndroid Build Coastguard Worker        ext             v20.8b,  v16.8b,  v17.8b,  #4
3286*c0909341SAndroid Build Coastguard Worker        smull           v2.8h,   v2.8b,   v19.8b
3287*c0909341SAndroid Build Coastguard Worker        smull           v3.8h,   v3.8b,   v18.8b
3288*c0909341SAndroid Build Coastguard Worker        ext             v18.8b,  v16.8b,  v17.8b,  #5
3289*c0909341SAndroid Build Coastguard Worker        ext             v19.8b,  v16.8b,  v17.8b,  #6
3290*c0909341SAndroid Build Coastguard Worker        smull           v4.8h,   v4.8b,   v20.8b
3291*c0909341SAndroid Build Coastguard Worker        smull           v5.8h,   v5.8b,   v18.8b
3292*c0909341SAndroid Build Coastguard Worker        ext             v18.8b,  v16.8b,  v17.8b,  #7
3293*c0909341SAndroid Build Coastguard Worker        smull           v6.8h,   v6.8b,   v19.8b
3294*c0909341SAndroid Build Coastguard Worker        smull           v7.8h,   v7.8b,   v18.8b
3295*c0909341SAndroid Build Coastguard Worker
3296*c0909341SAndroid Build Coastguard Worker        addp            v0.8h,   v0.8h,   v1.8h
3297*c0909341SAndroid Build Coastguard Worker        addp            v2.8h,   v2.8h,   v3.8h
3298*c0909341SAndroid Build Coastguard Worker        addp            v4.8h,   v4.8h,   v5.8h
3299*c0909341SAndroid Build Coastguard Worker        addp            v6.8h,   v6.8h,   v7.8h
3300*c0909341SAndroid Build Coastguard Worker
3301*c0909341SAndroid Build Coastguard Worker        addp            v0.8h,   v0.8h,   v2.8h
3302*c0909341SAndroid Build Coastguard Worker        addp            v4.8h,   v4.8h,   v6.8h
3303*c0909341SAndroid Build Coastguard Worker
3304*c0909341SAndroid Build Coastguard Worker        addp            v0.8h,   v0.8h,   v4.8h
3305*c0909341SAndroid Build Coastguard Worker
3306*c0909341SAndroid Build Coastguard Worker        add             w5,  w5,  w8
3307*c0909341SAndroid Build Coastguard Worker
3308*c0909341SAndroid Build Coastguard Worker        ret
3309*c0909341SAndroid Build Coastguard Workerendfunc
3310*c0909341SAndroid Build Coastguard Worker
3311*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_8bpc_neon(
3312*c0909341SAndroid Build Coastguard Worker//         pixel *dst, const ptrdiff_t dst_stride,
3313*c0909341SAndroid Build Coastguard Worker//         const pixel *src, const ptrdiff_t src_stride,
3314*c0909341SAndroid Build Coastguard Worker//         const int16_t *const abcd, int mx, int my)
3315*c0909341SAndroid Build Coastguard Worker.macro warp t, shift
3316*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_8bpc_neon, export=1
3317*c0909341SAndroid Build Coastguard Worker        ldr             x4,  [x4]
3318*c0909341SAndroid Build Coastguard Worker        sbfx            x7,  x4, #0,  #16
3319*c0909341SAndroid Build Coastguard Worker        sbfx            x8,  x4, #16, #16
3320*c0909341SAndroid Build Coastguard Worker        sbfx            x9,  x4, #32, #16
3321*c0909341SAndroid Build Coastguard Worker        sbfx            x4,  x4, #48, #16
3322*c0909341SAndroid Build Coastguard Worker        mov             w10, #8
3323*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x3, lsl #1
3324*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x3
3325*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  #3
3326*c0909341SAndroid Build Coastguard Worker        movrel          x11, X(mc_warp_filter), 64*8
3327*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3328*c0909341SAndroid Build Coastguard Worker.ifnb \t
3329*c0909341SAndroid Build Coastguard Worker        lsl             x1,  x1,  #1
3330*c0909341SAndroid Build Coastguard Worker.endif
3331*c0909341SAndroid Build Coastguard Worker
3332*c0909341SAndroid Build Coastguard Worker        movi            v22.8b,  #128
3333*c0909341SAndroid Build Coastguard Worker.ifb \t
3334*c0909341SAndroid Build Coastguard Worker        movi            v23.8h,  #128
3335*c0909341SAndroid Build Coastguard Worker.else
3336*c0909341SAndroid Build Coastguard Worker        movi            v23.8h,  #8, lsl #8
3337*c0909341SAndroid Build Coastguard Worker.endif
3338*c0909341SAndroid Build Coastguard Worker
3339*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3340*c0909341SAndroid Build Coastguard Worker        srshr           v24.8h,  v0.8h,  #3
3341*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3342*c0909341SAndroid Build Coastguard Worker        srshr           v25.8h,  v0.8h,  #3
3343*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3344*c0909341SAndroid Build Coastguard Worker        srshr           v26.8h,  v0.8h,  #3
3345*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3346*c0909341SAndroid Build Coastguard Worker        srshr           v27.8h,  v0.8h,  #3
3347*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3348*c0909341SAndroid Build Coastguard Worker        srshr           v28.8h,  v0.8h,  #3
3349*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3350*c0909341SAndroid Build Coastguard Worker        srshr           v29.8h,  v0.8h,  #3
3351*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3352*c0909341SAndroid Build Coastguard Worker        srshr           v30.8h,  v0.8h,  #3
3353*c0909341SAndroid Build Coastguard Worker
3354*c0909341SAndroid Build Coastguard Worker1:
3355*c0909341SAndroid Build Coastguard Worker        add             w14, w6,  #512
3356*c0909341SAndroid Build Coastguard Worker        bl              warp_filter_horz_neon
3357*c0909341SAndroid Build Coastguard Worker        srshr           v31.8h,  v0.8h,  #3
3358*c0909341SAndroid Build Coastguard Worker
3359*c0909341SAndroid Build Coastguard Worker        load_filter_row d0, w14, w9
3360*c0909341SAndroid Build Coastguard Worker        load_filter_row d1, w14, w9
3361*c0909341SAndroid Build Coastguard Worker        load_filter_row d2, w14, w9
3362*c0909341SAndroid Build Coastguard Worker        load_filter_row d3, w14, w9
3363*c0909341SAndroid Build Coastguard Worker        load_filter_row d4, w14, w9
3364*c0909341SAndroid Build Coastguard Worker        load_filter_row d5, w14, w9
3365*c0909341SAndroid Build Coastguard Worker        load_filter_row d6, w14, w9
3366*c0909341SAndroid Build Coastguard Worker        load_filter_row d7, w14, w9
3367*c0909341SAndroid Build Coastguard Worker        transpose_8x8b_xtl v0, v1, v2, v3, v4, v5, v6, v7, sxtl
3368*c0909341SAndroid Build Coastguard Worker
3369*c0909341SAndroid Build Coastguard Worker        // This ordering of smull/smlal/smull2/smlal2 is highly
3370*c0909341SAndroid Build Coastguard Worker        // beneficial for Cortex A53 here.
3371*c0909341SAndroid Build Coastguard Worker        smull           v16.4s,  v24.4h,  v0.4h
3372*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v25.4h,  v1.4h
3373*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v26.4h,  v2.4h
3374*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v27.4h,  v3.4h
3375*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v28.4h,  v4.4h
3376*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v29.4h,  v5.4h
3377*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v30.4h,  v6.4h
3378*c0909341SAndroid Build Coastguard Worker        smlal           v16.4s,  v31.4h,  v7.4h
3379*c0909341SAndroid Build Coastguard Worker        smull2          v17.4s,  v24.8h,  v0.8h
3380*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v25.8h,  v1.8h
3381*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v26.8h,  v2.8h
3382*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v27.8h,  v3.8h
3383*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v28.8h,  v4.8h
3384*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v29.8h,  v5.8h
3385*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v30.8h,  v6.8h
3386*c0909341SAndroid Build Coastguard Worker        smlal2          v17.4s,  v31.8h,  v7.8h
3387*c0909341SAndroid Build Coastguard Worker
3388*c0909341SAndroid Build Coastguard Worker        mov             v24.16b, v25.16b
3389*c0909341SAndroid Build Coastguard Worker        mov             v25.16b, v26.16b
3390*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #\shift
3391*c0909341SAndroid Build Coastguard Worker        mov             v26.16b, v27.16b
3392*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v17.4s,  #\shift
3393*c0909341SAndroid Build Coastguard Worker        mov             v27.16b, v28.16b
3394*c0909341SAndroid Build Coastguard Worker        mov             v28.16b, v29.16b
3395*c0909341SAndroid Build Coastguard Worker        add             v16.8h,  v16.8h,  v23.8h
3396*c0909341SAndroid Build Coastguard Worker.ifb \t
3397*c0909341SAndroid Build Coastguard Worker        sqxtun          v16.8b,  v16.8h
3398*c0909341SAndroid Build Coastguard Worker.endif
3399*c0909341SAndroid Build Coastguard Worker        mov             v29.16b, v30.16b
3400*c0909341SAndroid Build Coastguard Worker        mov             v30.16b, v31.16b
3401*c0909341SAndroid Build Coastguard Worker        subs            w10, w10, #1
3402*c0909341SAndroid Build Coastguard Worker.ifnb \t
3403*c0909341SAndroid Build Coastguard Worker        st1             {v16.8h}, [x0], x1
3404*c0909341SAndroid Build Coastguard Worker.else
3405*c0909341SAndroid Build Coastguard Worker        st1             {v16.8b}, [x0], x1
3406*c0909341SAndroid Build Coastguard Worker.endif
3407*c0909341SAndroid Build Coastguard Worker
3408*c0909341SAndroid Build Coastguard Worker        add             w6,  w6,  w4
3409*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3410*c0909341SAndroid Build Coastguard Worker
3411*c0909341SAndroid Build Coastguard Worker        ret             x15
3412*c0909341SAndroid Build Coastguard Workerendfunc
3413*c0909341SAndroid Build Coastguard Worker.endm
3414*c0909341SAndroid Build Coastguard Worker
3415*c0909341SAndroid Build Coastguard Workerwarp  , 11
3416*c0909341SAndroid Build Coastguard Workerwarp t, 7
3417*c0909341SAndroid Build Coastguard Worker
3418*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_8bpc_neon(
3419*c0909341SAndroid Build Coastguard Worker//         const intptr_t bw, const intptr_t bh,
3420*c0909341SAndroid Build Coastguard Worker//         const intptr_t iw, const intptr_t ih,
3421*c0909341SAndroid Build Coastguard Worker//         const intptr_t x, const intptr_t y,
3422*c0909341SAndroid Build Coastguard Worker//         pixel *dst, const ptrdiff_t dst_stride,
3423*c0909341SAndroid Build Coastguard Worker//         const pixel *ref, const ptrdiff_t ref_stride)
3424*c0909341SAndroid Build Coastguard Workerfunction emu_edge_8bpc_neon, export=1
3425*c0909341SAndroid Build Coastguard Worker        ldp             x8,  x9,  [sp]
3426*c0909341SAndroid Build Coastguard Worker
3427*c0909341SAndroid Build Coastguard Worker        // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride)
3428*c0909341SAndroid Build Coastguard Worker        // ref += iclip(x, 0, iw - 1)
3429*c0909341SAndroid Build Coastguard Worker        sub             x12, x3,  #1           // ih - 1
3430*c0909341SAndroid Build Coastguard Worker        cmp             x5,  x3
3431*c0909341SAndroid Build Coastguard Worker        sub             x13, x2,  #1           // iw - 1
3432*c0909341SAndroid Build Coastguard Worker        csel            x12, x12, x5,  ge      // min(y, ih - 1)
3433*c0909341SAndroid Build Coastguard Worker        cmp             x4,  x2
3434*c0909341SAndroid Build Coastguard Worker        bic             x12, x12, x12, asr #63 // max(min(y, ih - 1), 0)
3435*c0909341SAndroid Build Coastguard Worker        csel            x13, x13, x4,  ge      // min(x, iw - 1)
3436*c0909341SAndroid Build Coastguard Worker        bic             x13, x13, x13, asr #63 // max(min(x, iw - 1), 0)
3437*c0909341SAndroid Build Coastguard Worker        madd            x8,  x12, x9,  x8      // ref += iclip() * stride
3438*c0909341SAndroid Build Coastguard Worker        add             x8,  x8,  x13          // ref += iclip()
3439*c0909341SAndroid Build Coastguard Worker
3440*c0909341SAndroid Build Coastguard Worker        // bottom_ext = iclip(y + bh - ih, 0, bh - 1)
3441*c0909341SAndroid Build Coastguard Worker        // top_ext = iclip(-y, 0, bh - 1)
3442*c0909341SAndroid Build Coastguard Worker        add             x10, x5,  x1           // y + bh
3443*c0909341SAndroid Build Coastguard Worker        neg             x5,  x5                // -y
3444*c0909341SAndroid Build Coastguard Worker        sub             x10, x10, x3           // y + bh - ih
3445*c0909341SAndroid Build Coastguard Worker        sub             x12, x1,  #1           // bh - 1
3446*c0909341SAndroid Build Coastguard Worker        cmp             x10, x1
3447*c0909341SAndroid Build Coastguard Worker        bic             x5,  x5,  x5,  asr #63 // max(-y, 0)
3448*c0909341SAndroid Build Coastguard Worker        csel            x10, x10, x12, lt      // min(y + bh - ih, bh-1)
3449*c0909341SAndroid Build Coastguard Worker        cmp             x5,  x1
3450*c0909341SAndroid Build Coastguard Worker        bic             x10, x10, x10, asr #63 // max(min(y + bh - ih, bh-1), 0)
3451*c0909341SAndroid Build Coastguard Worker        csel            x5,  x5,  x12, lt      // min(max(-y, 0), bh-1)
3452*c0909341SAndroid Build Coastguard Worker
3453*c0909341SAndroid Build Coastguard Worker        // right_ext = iclip(x + bw - iw, 0, bw - 1)
3454*c0909341SAndroid Build Coastguard Worker        // left_ext = iclip(-x, 0, bw - 1)
3455*c0909341SAndroid Build Coastguard Worker        add             x11, x4,  x0           // x + bw
3456*c0909341SAndroid Build Coastguard Worker        neg             x4,  x4                // -x
3457*c0909341SAndroid Build Coastguard Worker        sub             x11, x11, x2           // x + bw - iw
3458*c0909341SAndroid Build Coastguard Worker        sub             x13, x0,  #1           // bw - 1
3459*c0909341SAndroid Build Coastguard Worker        cmp             x11, x0
3460*c0909341SAndroid Build Coastguard Worker        bic             x4,  x4,  x4,  asr #63 // max(-x, 0)
3461*c0909341SAndroid Build Coastguard Worker        csel            x11, x11, x13, lt      // min(x + bw - iw, bw-1)
3462*c0909341SAndroid Build Coastguard Worker        cmp             x4,  x0
3463*c0909341SAndroid Build Coastguard Worker        bic             x11, x11, x11, asr #63 // max(min(x + bw - iw, bw-1), 0)
3464*c0909341SAndroid Build Coastguard Worker        csel            x4,  x4,  x13, lt      // min(max(-x, 0), bw - 1)
3465*c0909341SAndroid Build Coastguard Worker
3466*c0909341SAndroid Build Coastguard Worker        // center_h = bh - top_ext - bottom_ext
3467*c0909341SAndroid Build Coastguard Worker        // dst += top_ext * PXSTRIDE(dst_stride)
3468*c0909341SAndroid Build Coastguard Worker        // center_w = bw - left_ext - right_ext
3469*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  x5           // bh - top_ext
3470*c0909341SAndroid Build Coastguard Worker        madd            x6,  x5,  x7,  x6
3471*c0909341SAndroid Build Coastguard Worker        sub             x2,  x0,  x4           // bw - left_ext
3472*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  x10          // center_h = bh - top_ext - bottom_ext
3473*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x11          // center_w = bw - left_ext - right_ext
3474*c0909341SAndroid Build Coastguard Worker
3475*c0909341SAndroid Build Coastguard Worker        mov             x14, x6                // backup of dst
3476*c0909341SAndroid Build Coastguard Worker
3477*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right
3478*c0909341SAndroid Build Coastguard Worker0:
3479*c0909341SAndroid Build Coastguard Worker.if \need_left
3480*c0909341SAndroid Build Coastguard Worker        ld1r            {v0.16b}, [x8]
3481*c0909341SAndroid Build Coastguard Worker        mov             x12, x6                // out = dst
3482*c0909341SAndroid Build Coastguard Worker        mov             x3,  x4
3483*c0909341SAndroid Build Coastguard Worker1:
3484*c0909341SAndroid Build Coastguard Worker        subs            x3,  x3,  #16
3485*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b}, [x12], #16
3486*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3487*c0909341SAndroid Build Coastguard Worker.endif
3488*c0909341SAndroid Build Coastguard Worker        mov             x13, x8
3489*c0909341SAndroid Build Coastguard Worker        add             x12, x6,  x4           // out = dst + left_ext
3490*c0909341SAndroid Build Coastguard Worker        mov             x3,  x2
3491*c0909341SAndroid Build Coastguard Worker1:
3492*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b, v1.16b}, [x13], #32
3493*c0909341SAndroid Build Coastguard Worker        subs            x3,  x3,  #32
3494*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b, v1.16b}, [x12], #32
3495*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3496*c0909341SAndroid Build Coastguard Worker.if \need_right
3497*c0909341SAndroid Build Coastguard Worker        add             x3,  x8,  x2           // in + center_w
3498*c0909341SAndroid Build Coastguard Worker        sub             x3,  x3,  #1           // in + center_w - 1
3499*c0909341SAndroid Build Coastguard Worker        add             x12, x6,  x4           // dst + left_ext
3500*c0909341SAndroid Build Coastguard Worker        ld1r            {v0.16b}, [x3]
3501*c0909341SAndroid Build Coastguard Worker        add             x12, x12, x2           // out = dst + left_ext + center_w
3502*c0909341SAndroid Build Coastguard Worker        mov             x3,  x11
3503*c0909341SAndroid Build Coastguard Worker1:
3504*c0909341SAndroid Build Coastguard Worker        subs            x3,  x3,  #16
3505*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b}, [x12], #16
3506*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3507*c0909341SAndroid Build Coastguard Worker.endif
3508*c0909341SAndroid Build Coastguard Worker
3509*c0909341SAndroid Build Coastguard Worker        subs            x1,  x1,  #1           // center_h--
3510*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  x7
3511*c0909341SAndroid Build Coastguard Worker        add             x8,  x8,  x9
3512*c0909341SAndroid Build Coastguard Worker        b.gt            0b
3513*c0909341SAndroid Build Coastguard Worker.endm
3514*c0909341SAndroid Build Coastguard Worker
3515*c0909341SAndroid Build Coastguard Worker        cbz             x4,  2f
3516*c0909341SAndroid Build Coastguard Worker        // need_left
3517*c0909341SAndroid Build Coastguard Worker        cbz             x11, 3f
3518*c0909341SAndroid Build Coastguard Worker        // need_left + need_right
3519*c0909341SAndroid Build Coastguard Worker        v_loop          1,   1
3520*c0909341SAndroid Build Coastguard Worker        b               5f
3521*c0909341SAndroid Build Coastguard Worker
3522*c0909341SAndroid Build Coastguard Worker2:
3523*c0909341SAndroid Build Coastguard Worker        // !need_left
3524*c0909341SAndroid Build Coastguard Worker        cbz             x11, 4f
3525*c0909341SAndroid Build Coastguard Worker        // !need_left + need_right
3526*c0909341SAndroid Build Coastguard Worker        v_loop          0,   1
3527*c0909341SAndroid Build Coastguard Worker        b               5f
3528*c0909341SAndroid Build Coastguard Worker
3529*c0909341SAndroid Build Coastguard Worker3:
3530*c0909341SAndroid Build Coastguard Worker        // need_left + !need_right
3531*c0909341SAndroid Build Coastguard Worker        v_loop          1,   0
3532*c0909341SAndroid Build Coastguard Worker        b               5f
3533*c0909341SAndroid Build Coastguard Worker
3534*c0909341SAndroid Build Coastguard Worker4:
3535*c0909341SAndroid Build Coastguard Worker        // !need_left + !need_right
3536*c0909341SAndroid Build Coastguard Worker        v_loop          0,   0
3537*c0909341SAndroid Build Coastguard Worker
3538*c0909341SAndroid Build Coastguard Worker5:
3539*c0909341SAndroid Build Coastguard Worker
3540*c0909341SAndroid Build Coastguard Worker        cbz             x10, 3f
3541*c0909341SAndroid Build Coastguard Worker        // need_bottom
3542*c0909341SAndroid Build Coastguard Worker        sub             x8,  x6,  x7           // ref = dst - stride
3543*c0909341SAndroid Build Coastguard Worker        mov             x4,  x0
3544*c0909341SAndroid Build Coastguard Worker1:
3545*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b, v1.16b}, [x8], #32
3546*c0909341SAndroid Build Coastguard Worker        mov             x3,  x10
3547*c0909341SAndroid Build Coastguard Worker2:
3548*c0909341SAndroid Build Coastguard Worker        subs            x3,  x3,  #1
3549*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b, v1.16b}, [x6], x7
3550*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3551*c0909341SAndroid Build Coastguard Worker        msub            x6,  x7,  x10,  x6     // dst -= bottom_ext * stride
3552*c0909341SAndroid Build Coastguard Worker        subs            x4,  x4,  #32          // bw -= 32
3553*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #32          // dst += 32
3554*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3555*c0909341SAndroid Build Coastguard Worker
3556*c0909341SAndroid Build Coastguard Worker3:
3557*c0909341SAndroid Build Coastguard Worker        cbz             x5,  3f
3558*c0909341SAndroid Build Coastguard Worker        // need_top
3559*c0909341SAndroid Build Coastguard Worker        msub            x6,  x7,  x5,  x14     // dst = stored_dst - top_ext * stride
3560*c0909341SAndroid Build Coastguard Worker1:
3561*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b, v1.16b}, [x14], #32
3562*c0909341SAndroid Build Coastguard Worker        mov             x3,  x5
3563*c0909341SAndroid Build Coastguard Worker2:
3564*c0909341SAndroid Build Coastguard Worker        subs            x3,  x3,  #1
3565*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b, v1.16b}, [x6], x7
3566*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3567*c0909341SAndroid Build Coastguard Worker        msub            x6,  x7,  x5,  x6      // dst -= top_ext * stride
3568*c0909341SAndroid Build Coastguard Worker        subs            x0,  x0,  #32          // bw -= 32
3569*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #32          // dst += 32
3570*c0909341SAndroid Build Coastguard Worker        b.gt            1b
3571*c0909341SAndroid Build Coastguard Worker
3572*c0909341SAndroid Build Coastguard Worker3:
3573*c0909341SAndroid Build Coastguard Worker        ret
3574*c0909341SAndroid Build Coastguard Workerendfunc
3575