xref: /aosp_15_r20/external/libdav1d/src/arm/64/itx16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker
31*c0909341SAndroid Build Coastguard Worker// The exported functions in this file have got the following signature:
32*c0909341SAndroid Build Coastguard Worker// void itxfm_add(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob,
33*c0909341SAndroid Build Coastguard Worker//                int bitdepth_max);
34*c0909341SAndroid Build Coastguard Worker
35*c0909341SAndroid Build Coastguard Worker// Most of the functions use the following register layout:
36*c0909341SAndroid Build Coastguard Worker// x0-x3  external parameters
37*c0909341SAndroid Build Coastguard Worker// x4     function pointer to first transform
38*c0909341SAndroid Build Coastguard Worker// x5     function pointer to second transform
39*c0909341SAndroid Build Coastguard Worker// x6     output parameter for helper function
40*c0909341SAndroid Build Coastguard Worker// x7     input parameter for helper function
41*c0909341SAndroid Build Coastguard Worker// x8     input stride for helper function
42*c0909341SAndroid Build Coastguard Worker// x9-x12 scratch variables for helper functions
43*c0909341SAndroid Build Coastguard Worker// x13    pointer to list of eob thresholds
44*c0909341SAndroid Build Coastguard Worker// x14    return pointer for helper function
45*c0909341SAndroid Build Coastguard Worker// x15    return pointer for main function
46*c0909341SAndroid Build Coastguard Worker
47*c0909341SAndroid Build Coastguard Worker// The SIMD registers most often use the following layout:
48*c0909341SAndroid Build Coastguard Worker// v0-v1   multiplication coefficients
49*c0909341SAndroid Build Coastguard Worker// v2-v7   scratch registers
50*c0909341SAndroid Build Coastguard Worker// v8-v15  unused
51*c0909341SAndroid Build Coastguard Worker// v16-v31 inputs/outputs of transforms
52*c0909341SAndroid Build Coastguard Worker
53*c0909341SAndroid Build Coastguard Workerconst idct_coeffs, align=4
54*c0909341SAndroid Build Coastguard Worker        // idct4
55*c0909341SAndroid Build Coastguard Worker        .int            2896, 2896*8*(1<<16), 1567, 3784
56*c0909341SAndroid Build Coastguard Worker        // idct8
57*c0909341SAndroid Build Coastguard Worker        .int            799, 4017, 3406, 2276
58*c0909341SAndroid Build Coastguard Worker        // idct16
59*c0909341SAndroid Build Coastguard Worker        .int            401, 4076, 3166, 2598
60*c0909341SAndroid Build Coastguard Worker        .int            1931, 3612, 3920, 1189
61*c0909341SAndroid Build Coastguard Worker        // idct32
62*c0909341SAndroid Build Coastguard Worker        .int            201, 4091, 3035, 2751
63*c0909341SAndroid Build Coastguard Worker        .int            1751, 3703, 3857, 1380
64*c0909341SAndroid Build Coastguard Worker        .int            995, 3973, 3513, 2106
65*c0909341SAndroid Build Coastguard Worker        .int            2440, 3290, 4052, 601
66*c0909341SAndroid Build Coastguard Workerendconst
67*c0909341SAndroid Build Coastguard Worker
68*c0909341SAndroid Build Coastguard Workerconst idct64_coeffs, align=4
69*c0909341SAndroid Build Coastguard Worker        .int            101*8*(1<<16), 4095*8*(1<<16), 2967*8*(1<<16), -2824*8*(1<<16)
70*c0909341SAndroid Build Coastguard Worker        .int            1660*8*(1<<16), 3745*8*(1<<16), 3822*8*(1<<16), -1474*8*(1<<16)
71*c0909341SAndroid Build Coastguard Worker        .int            4076, 401, 4017, 799
72*c0909341SAndroid Build Coastguard Worker
73*c0909341SAndroid Build Coastguard Worker        .int            4036*8*(1<<16), -700*8*(1<<16), 2359*8*(1<<16), 3349*8*(1<<16)
74*c0909341SAndroid Build Coastguard Worker        .int            3461*8*(1<<16), -2191*8*(1<<16), 897*8*(1<<16), 3996*8*(1<<16)
75*c0909341SAndroid Build Coastguard Worker        .int            -3166, -2598, -799, -4017
76*c0909341SAndroid Build Coastguard Worker
77*c0909341SAndroid Build Coastguard Worker        .int            501*8*(1<<16), 4065*8*(1<<16), 3229*8*(1<<16), -2520*8*(1<<16)
78*c0909341SAndroid Build Coastguard Worker        .int            2019*8*(1<<16), 3564*8*(1<<16), 3948*8*(1<<16), -1092*8*(1<<16)
79*c0909341SAndroid Build Coastguard Worker        .int            3612, 1931, 2276, 3406
80*c0909341SAndroid Build Coastguard Worker
81*c0909341SAndroid Build Coastguard Worker        .int            4085*8*(1<<16), -301*8*(1<<16), 2675*8*(1<<16), 3102*8*(1<<16)
82*c0909341SAndroid Build Coastguard Worker        .int            3659*8*(1<<16), -1842*8*(1<<16), 1285*8*(1<<16), 3889*8*(1<<16)
83*c0909341SAndroid Build Coastguard Worker        .int            -3920, -1189, -3406, -2276
84*c0909341SAndroid Build Coastguard Workerendconst
85*c0909341SAndroid Build Coastguard Worker
86*c0909341SAndroid Build Coastguard Workerconst iadst4_coeffs, align=4
87*c0909341SAndroid Build Coastguard Worker        .int            1321, 3803, 2482, 3344
88*c0909341SAndroid Build Coastguard Workerendconst
89*c0909341SAndroid Build Coastguard Worker
90*c0909341SAndroid Build Coastguard Workerconst iadst8_coeffs, align=4
91*c0909341SAndroid Build Coastguard Worker        .int            4076, 401, 3612, 1931
92*c0909341SAndroid Build Coastguard Worker        .int            2598, 3166, 1189, 3920
93*c0909341SAndroid Build Coastguard Worker        // idct_coeffs
94*c0909341SAndroid Build Coastguard Worker        .int            2896, 0, 1567, 3784
95*c0909341SAndroid Build Coastguard Workerendconst
96*c0909341SAndroid Build Coastguard Worker
97*c0909341SAndroid Build Coastguard Workerconst iadst16_coeffs, align=4
98*c0909341SAndroid Build Coastguard Worker        .int            4091, 201, 3973, 995
99*c0909341SAndroid Build Coastguard Worker        .int            3703, 1751, 3290, 2440
100*c0909341SAndroid Build Coastguard Worker        .int            2751, 3035, 2106, 3513
101*c0909341SAndroid Build Coastguard Worker        .int            1380, 3857, 601, 4052
102*c0909341SAndroid Build Coastguard Workerendconst
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Worker.macro mul_mla d, s0, s1, c0, c1
105*c0909341SAndroid Build Coastguard Worker        mul             \d\().4s, \s0\().4s, \c0
106*c0909341SAndroid Build Coastguard Worker        mla             \d\().4s, \s1\().4s, \c1
107*c0909341SAndroid Build Coastguard Worker.endm
108*c0909341SAndroid Build Coastguard Worker
109*c0909341SAndroid Build Coastguard Worker.macro mul_mls d, s0, s1, c0, c1
110*c0909341SAndroid Build Coastguard Worker        mul             \d\().4s, \s0\().4s, \c0
111*c0909341SAndroid Build Coastguard Worker        mls             \d\().4s, \s1\().4s, \c1
112*c0909341SAndroid Build Coastguard Worker.endm
113*c0909341SAndroid Build Coastguard Worker
114*c0909341SAndroid Build Coastguard Worker.macro scale_input sz, c, r0, r1, r2 r3, r4, r5, r6, r7
115*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r0\sz,  \r0\sz,  \c
116*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r1\sz,  \r1\sz,  \c
117*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r2\sz,  \r2\sz,  \c
118*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r3\sz,  \r3\sz,  \c
119*c0909341SAndroid Build Coastguard Worker.ifnb \r4
120*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r4\sz,  \r4\sz,  \c
121*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r5\sz,  \r5\sz,  \c
122*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r6\sz,  \r6\sz,  \c
123*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r7\sz,  \r7\sz,  \c
124*c0909341SAndroid Build Coastguard Worker.endif
125*c0909341SAndroid Build Coastguard Worker.endm
126*c0909341SAndroid Build Coastguard Worker
127*c0909341SAndroid Build Coastguard Worker.macro smin_4s r0, r1, r2
128*c0909341SAndroid Build Coastguard Worker        smin            \r0\().4s, \r1\().4s, \r2\().4s
129*c0909341SAndroid Build Coastguard Worker.endm
130*c0909341SAndroid Build Coastguard Worker.macro smax_4s r0, r1, r2
131*c0909341SAndroid Build Coastguard Worker        smax            \r0\().4s, \r1\().4s, \r2\().4s
132*c0909341SAndroid Build Coastguard Worker.endm
133*c0909341SAndroid Build Coastguard Worker
134*c0909341SAndroid Build Coastguard Worker.macro load_add_store load, shift, addsrc, adddst, min, store, dst, src, shiftbits=4
135*c0909341SAndroid Build Coastguard Worker.ifnb \load
136*c0909341SAndroid Build Coastguard Worker        ld1             {\load},  [\src], x1
137*c0909341SAndroid Build Coastguard Worker.endif
138*c0909341SAndroid Build Coastguard Worker.ifnb \shift
139*c0909341SAndroid Build Coastguard Worker        srshr           \shift,  \shift,  #\shiftbits
140*c0909341SAndroid Build Coastguard Worker.endif
141*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
142*c0909341SAndroid Build Coastguard Worker        usqadd          \adddst, \addsrc
143*c0909341SAndroid Build Coastguard Worker.endif
144*c0909341SAndroid Build Coastguard Worker.ifnb \min
145*c0909341SAndroid Build Coastguard Worker        smin            \min,  \min,  v7.8h
146*c0909341SAndroid Build Coastguard Worker.endif
147*c0909341SAndroid Build Coastguard Worker.ifnb \store
148*c0909341SAndroid Build Coastguard Worker        st1             {\store},  [\dst], x1
149*c0909341SAndroid Build Coastguard Worker.endif
150*c0909341SAndroid Build Coastguard Worker.endm
151*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x16 dst, src
152*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
153*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
154*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8h,  v16.8h,       ,      ,       ,       ,  \dst, \src
155*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8h,  v17.8h,       ,      ,       ,       ,  \dst, \src
156*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8h,  v18.8h, v16.8h, v2.8h,       ,       ,  \dst, \src
157*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8h,  v19.8h, v17.8h, v3.8h,  v2.8h,       ,  \dst, \src
158*c0909341SAndroid Build Coastguard Worker        load_add_store  v16.8h, v20.8h, v18.8h, v4.8h,  v3.8h,  v2.8h,  \dst, \src
159*c0909341SAndroid Build Coastguard Worker        load_add_store  v17.8h, v21.8h, v19.8h, v5.8h,  v4.8h,  v3.8h,  \dst, \src
160*c0909341SAndroid Build Coastguard Worker        load_add_store  v18.8h, v22.8h, v20.8h, v16.8h, v5.8h,  v4.8h,  \dst, \src
161*c0909341SAndroid Build Coastguard Worker        load_add_store  v19.8h, v23.8h, v21.8h, v17.8h, v16.8h, v5.8h,  \dst, \src
162*c0909341SAndroid Build Coastguard Worker        load_add_store  v20.8h, v24.8h, v22.8h, v18.8h, v17.8h, v16.8h, \dst, \src
163*c0909341SAndroid Build Coastguard Worker        load_add_store  v21.8h, v25.8h, v23.8h, v19.8h, v18.8h, v17.8h, \dst, \src
164*c0909341SAndroid Build Coastguard Worker        load_add_store  v22.8h, v26.8h, v24.8h, v20.8h, v19.8h, v18.8h, \dst, \src
165*c0909341SAndroid Build Coastguard Worker        load_add_store  v23.8h, v27.8h, v25.8h, v21.8h, v20.8h, v19.8h, \dst, \src
166*c0909341SAndroid Build Coastguard Worker        load_add_store  v24.8h, v28.8h, v26.8h, v22.8h, v21.8h, v20.8h, \dst, \src
167*c0909341SAndroid Build Coastguard Worker        load_add_store  v25.8h, v29.8h, v27.8h, v23.8h, v22.8h, v21.8h, \dst, \src
168*c0909341SAndroid Build Coastguard Worker        load_add_store  v26.8h, v30.8h, v28.8h, v24.8h, v23.8h, v22.8h, \dst, \src
169*c0909341SAndroid Build Coastguard Worker        load_add_store  v27.8h, v31.8h, v29.8h, v25.8h, v24.8h, v23.8h, \dst, \src
170*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       , v30.8h, v26.8h, v25.8h, v24.8h, \dst, \src
171*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       , v31.8h, v27.8h, v26.8h, v25.8h, \dst, \src
172*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       ,       ,       , v27.8h, v26.8h, \dst, \src
173*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       ,       ,       ,       , v27.8h, \dst, \src
174*c0909341SAndroid Build Coastguard Worker.endm
175*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x8 dst, src, shiftbits=4
176*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
177*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
178*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8h,  v16.8h,       ,      ,       ,       ,  \dst, \src, \shiftbits
179*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8h,  v17.8h,       ,      ,       ,       ,  \dst, \src, \shiftbits
180*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8h,  v18.8h, v16.8h, v2.8h,       ,       ,  \dst, \src, \shiftbits
181*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8h,  v19.8h, v17.8h, v3.8h,  v2.8h,       ,  \dst, \src, \shiftbits
182*c0909341SAndroid Build Coastguard Worker        load_add_store  v16.8h, v20.8h, v18.8h, v4.8h,  v3.8h,  v2.8h,  \dst, \src, \shiftbits
183*c0909341SAndroid Build Coastguard Worker        load_add_store  v17.8h, v21.8h, v19.8h, v5.8h,  v4.8h,  v3.8h,  \dst, \src, \shiftbits
184*c0909341SAndroid Build Coastguard Worker        load_add_store  v18.8h, v22.8h, v20.8h, v16.8h, v5.8h,  v4.8h,  \dst, \src, \shiftbits
185*c0909341SAndroid Build Coastguard Worker        load_add_store  v19.8h, v23.8h, v21.8h, v17.8h, v16.8h, v5.8h,  \dst, \src, \shiftbits
186*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       , v22.8h, v18.8h, v17.8h, v16.8h, \dst, \src, \shiftbits
187*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       , v23.8h, v19.8h, v18.8h, v17.8h, \dst, \src, \shiftbits
188*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       ,       ,       , v19.8h, v18.8h, \dst, \src, \shiftbits
189*c0909341SAndroid Build Coastguard Worker        load_add_store        ,       ,       ,       ,       , v19.8h, \dst, \src, \shiftbits
190*c0909341SAndroid Build Coastguard Worker.endm
191*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x4 dst, src, shiftbits=4
192*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
193*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
194*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8h, v16.8h,       ,      ,      ,      , \dst, \src, \shiftbits
195*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8h, v17.8h,       ,      ,      ,      , \dst, \src, \shiftbits
196*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8h, v18.8h, v16.8h, v2.8h,      ,      , \dst, \src, \shiftbits
197*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8h, v19.8h, v17.8h, v3.8h, v2.8h,      , \dst, \src, \shiftbits
198*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v18.8h, v4.8h, v3.8h, v2.8h, \dst, \src, \shiftbits
199*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v19.8h, v5.8h, v4.8h, v3.8h, \dst, \src, \shiftbits
200*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,       ,      , v5.8h, v4.8h, \dst, \src, \shiftbits
201*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,       ,      ,      , v5.8h, \dst, \src, \shiftbits
202*c0909341SAndroid Build Coastguard Worker.endm
203*c0909341SAndroid Build Coastguard Worker.macro load_add_store4 load, inssrc, insdst, shift, addsrc, adddst, min, store, dst, src
204*c0909341SAndroid Build Coastguard Worker.ifnb \load
205*c0909341SAndroid Build Coastguard Worker        ld1             {\load}[0],  [\src], x1
206*c0909341SAndroid Build Coastguard Worker.endif
207*c0909341SAndroid Build Coastguard Worker.ifnb \inssrc
208*c0909341SAndroid Build Coastguard Worker        ins             \insdst\().d[1],   \inssrc\().d[0]
209*c0909341SAndroid Build Coastguard Worker.endif
210*c0909341SAndroid Build Coastguard Worker.ifnb \shift
211*c0909341SAndroid Build Coastguard Worker        srshr           \shift,  \shift,  #4
212*c0909341SAndroid Build Coastguard Worker.endif
213*c0909341SAndroid Build Coastguard Worker.ifnb \load
214*c0909341SAndroid Build Coastguard Worker        ld1             {\load}[1],  [\src], x1
215*c0909341SAndroid Build Coastguard Worker.endif
216*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
217*c0909341SAndroid Build Coastguard Worker        usqadd          \adddst, \addsrc
218*c0909341SAndroid Build Coastguard Worker.endif
219*c0909341SAndroid Build Coastguard Worker.ifnb \store
220*c0909341SAndroid Build Coastguard Worker        st1             {\store}[0],  [\dst], x1
221*c0909341SAndroid Build Coastguard Worker.endif
222*c0909341SAndroid Build Coastguard Worker.ifnb \min
223*c0909341SAndroid Build Coastguard Worker        smin            \min,  \min,  v7.8h
224*c0909341SAndroid Build Coastguard Worker.endif
225*c0909341SAndroid Build Coastguard Worker.ifnb \store
226*c0909341SAndroid Build Coastguard Worker        st1             {\store}[1],  [\dst], x1
227*c0909341SAndroid Build Coastguard Worker.endif
228*c0909341SAndroid Build Coastguard Worker.endm
229*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x16 dst, src
230*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
231*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
232*c0909341SAndroid Build Coastguard Worker        load_add_store4 v0.d,  v17, v16,       ,       ,      ,       ,      ,  \dst, \src
233*c0909341SAndroid Build Coastguard Worker        load_add_store4 v1.d,  v19, v18,       ,       ,      ,       ,      ,  \dst, \src
234*c0909341SAndroid Build Coastguard Worker        load_add_store4 v2.d,  v21, v20, v16.8h,       ,      ,       ,      ,  \dst, \src
235*c0909341SAndroid Build Coastguard Worker        load_add_store4 v3.d,  v23, v22, v18.8h, v16.8h, v0.8h,       ,      ,  \dst, \src
236*c0909341SAndroid Build Coastguard Worker        load_add_store4 v17.d, v25, v24, v20.8h, v18.8h, v1.8h,  v0.8h,      ,  \dst, \src
237*c0909341SAndroid Build Coastguard Worker        load_add_store4 v19.d, v27, v26, v22.8h, v20.8h, v2.8h,  v1.8h,  v0.d,  \dst, \src
238*c0909341SAndroid Build Coastguard Worker        load_add_store4 v21.d, v29, v28, v24.8h, v22.8h, v3.8h,  v2.8h,  v1.d,  \dst, \src
239*c0909341SAndroid Build Coastguard Worker        load_add_store4 v23.d, v31, v30, v26.8h, v24.8h, v17.8h, v3.8h,  v2.d,  \dst, \src
240*c0909341SAndroid Build Coastguard Worker        load_add_store4      ,    ,    , v28.8h, v26.8h, v19.8h, v17.8h, v3.d,  \dst, \src
241*c0909341SAndroid Build Coastguard Worker        load_add_store4      ,    ,    , v30.8h, v28.8h, v21.8h, v19.8h, v17.d, \dst, \src
242*c0909341SAndroid Build Coastguard Worker        load_add_store4      ,    ,    ,       , v30.8h, v23.8h, v21.8h, v19.d, \dst, \src
243*c0909341SAndroid Build Coastguard Worker        load_add_store4      ,    ,    ,       ,      ,        , v23.8h, v21.d, \dst, \src
244*c0909341SAndroid Build Coastguard Worker        load_add_store4      ,    ,    ,       ,      ,        ,       , v23.d, \dst, \src
245*c0909341SAndroid Build Coastguard Worker.endm
246*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x8 dst, src
247*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
248*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
249*c0909341SAndroid Build Coastguard Worker        load_add_store4 v0.d, v17, v16,       ,       ,      ,      ,     , \dst, \src
250*c0909341SAndroid Build Coastguard Worker        load_add_store4 v1.d, v19, v18,       ,       ,      ,      ,     , \dst, \src
251*c0909341SAndroid Build Coastguard Worker        load_add_store4 v2.d, v21, v20, v16.8h,       ,      ,      ,     , \dst, \src
252*c0909341SAndroid Build Coastguard Worker        load_add_store4 v3.d, v23, v22, v18.8h, v16.8h, v0.8h,      ,     , \dst, \src
253*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v20.8h, v18.8h, v1.8h, v0.8h,     , \dst, \src
254*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v22.8h, v20.8h, v2.8h, v1.8h, v0.d, \dst, \src
255*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       , v22.8h, v3.8h, v2.8h, v1.d, \dst, \src
256*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,       ,      , v3.8h, v2.d, \dst, \src
257*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,       ,      ,      , v3.d, \dst, \src
258*c0909341SAndroid Build Coastguard Worker.endm
259*c0909341SAndroid Build Coastguard Worker
260*c0909341SAndroid Build Coastguard Worker.macro idct_dc w, h, shift
261*c0909341SAndroid Build Coastguard Worker        cbnz            w3,  1f
262*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
263*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.4s}, [x2]
264*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
265*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.4s,  v16.4s,  v0.s[0]
266*c0909341SAndroid Build Coastguard Worker        str             wzr, [x2]
267*c0909341SAndroid Build Coastguard Worker.if (\w == 2*\h) || (2*\w == \h)
268*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.4s,  v20.4s,  v0.s[0]
269*c0909341SAndroid Build Coastguard Worker.endif
270*c0909341SAndroid Build Coastguard Worker.if \shift > 0
271*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v20.4s,  #\shift
272*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #\shift
273*c0909341SAndroid Build Coastguard Worker.else
274*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v20.4s
275*c0909341SAndroid Build Coastguard Worker        sqxtn2          v16.8h,  v20.4s
276*c0909341SAndroid Build Coastguard Worker.endif
277*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v0.h[1]
278*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #4
279*c0909341SAndroid Build Coastguard Worker        mov             w4,  #\h
280*c0909341SAndroid Build Coastguard Worker        b               idct_dc_w\w\()_neon
281*c0909341SAndroid Build Coastguard Worker1:
282*c0909341SAndroid Build Coastguard Worker.endm
283*c0909341SAndroid Build Coastguard Worker
284*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w4_neon
285*c0909341SAndroid Build Coastguard Worker        mvni            v31.8h,  #0xfc, lsl #8 // 0x3ff
286*c0909341SAndroid Build Coastguard Worker1:
287*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[0], [x0], x1
288*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[1], [x0], x1
289*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[0], [x0], x1
290*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #4
291*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[1], [x0], x1
292*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
293*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
294*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v16.8h
295*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
296*c0909341SAndroid Build Coastguard Worker        st1             {v0.d}[0], [x0], x1
297*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
298*c0909341SAndroid Build Coastguard Worker        st1             {v0.d}[1], [x0], x1
299*c0909341SAndroid Build Coastguard Worker        st1             {v1.d}[0], [x0], x1
300*c0909341SAndroid Build Coastguard Worker        st1             {v1.d}[1], [x0], x1
301*c0909341SAndroid Build Coastguard Worker        b.gt            1b
302*c0909341SAndroid Build Coastguard Worker        ret
303*c0909341SAndroid Build Coastguard Workerendfunc
304*c0909341SAndroid Build Coastguard Worker
305*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w8_neon
306*c0909341SAndroid Build Coastguard Worker        mvni            v31.8h,  #0xfc, lsl #8 // 0x3ff
307*c0909341SAndroid Build Coastguard Worker1:
308*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x0], x1
309*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #4
310*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8h}, [x0], x1
311*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
312*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h}, [x0], x1
313*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v16.8h
314*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8h}, [x0], x1
315*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v16.8h
316*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   v16.8h
317*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
318*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
319*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
320*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h}, [x0], x1
321*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v31.8h
322*c0909341SAndroid Build Coastguard Worker        st1             {v1.8h}, [x0], x1
323*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v31.8h
324*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x0], x1
325*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h}, [x0], x1
326*c0909341SAndroid Build Coastguard Worker        b.gt            1b
327*c0909341SAndroid Build Coastguard Worker        ret
328*c0909341SAndroid Build Coastguard Workerendfunc
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w16_neon
331*c0909341SAndroid Build Coastguard Worker        mvni            v31.8h,  #0xfc, lsl #8 // 0x3ff
332*c0909341SAndroid Build Coastguard Worker1:
333*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x0], x1
334*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
335*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h, v3.8h}, [x0], x1
336*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
337*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v16.8h
338*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #1
339*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v16.8h
340*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   v16.8h
341*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
342*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
343*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v31.8h
344*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h, v1.8h}, [x0], x1
345*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v31.8h
346*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h, v3.8h}, [x0], x1
347*c0909341SAndroid Build Coastguard Worker        b.gt            1b
348*c0909341SAndroid Build Coastguard Worker        ret
349*c0909341SAndroid Build Coastguard Workerendfunc
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w32_neon
352*c0909341SAndroid Build Coastguard Worker        mvni            v31.8h,  #0xfc, lsl #8 // 0x3ff
353*c0909341SAndroid Build Coastguard Worker1:
354*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h, v2.8h, v3.8h}, [x0]
355*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #1
356*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
357*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v16.8h
358*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v16.8h
359*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   v16.8h
360*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
361*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
362*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v31.8h
363*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v31.8h
364*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], x1
365*c0909341SAndroid Build Coastguard Worker        b.gt            1b
366*c0909341SAndroid Build Coastguard Worker        ret
367*c0909341SAndroid Build Coastguard Workerendfunc
368*c0909341SAndroid Build Coastguard Worker
369*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w64_neon
370*c0909341SAndroid Build Coastguard Worker        mvni            v31.8h,  #0xfc, lsl #8 // 0x3ff
371*c0909341SAndroid Build Coastguard Worker        sub             x1,  x1,  #64
372*c0909341SAndroid Build Coastguard Worker1:
373*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64
374*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #1
375*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
376*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x0]
377*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v16.8h
378*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  #64
379*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v16.8h
380*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   v16.8h
381*c0909341SAndroid Build Coastguard Worker        usqadd          v4.8h,   v16.8h
382*c0909341SAndroid Build Coastguard Worker        usqadd          v5.8h,   v16.8h
383*c0909341SAndroid Build Coastguard Worker        usqadd          v6.8h,   v16.8h
384*c0909341SAndroid Build Coastguard Worker        usqadd          v7.8h,   v16.8h
385*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
386*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
387*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v31.8h
388*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v31.8h
389*c0909341SAndroid Build Coastguard Worker        smin            v4.8h,   v4.8h,   v31.8h
390*c0909341SAndroid Build Coastguard Worker        smin            v5.8h,   v5.8h,   v31.8h
391*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h, v1.8h, v2.8h, v3.8h}, [x0], #64
392*c0909341SAndroid Build Coastguard Worker        smin            v6.8h,   v6.8h,   v31.8h
393*c0909341SAndroid Build Coastguard Worker        smin            v7.8h,   v7.8h,   v31.8h
394*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x0], x1
395*c0909341SAndroid Build Coastguard Worker        b.gt            1b
396*c0909341SAndroid Build Coastguard Worker        ret
397*c0909341SAndroid Build Coastguard Workerendfunc
398*c0909341SAndroid Build Coastguard Worker
399*c0909341SAndroid Build Coastguard Worker.macro iwht4
400*c0909341SAndroid Build Coastguard Worker        add             v16.4s,  v16.4s,  v17.4s
401*c0909341SAndroid Build Coastguard Worker        sub             v21.4s,  v18.4s,  v19.4s
402*c0909341SAndroid Build Coastguard Worker        sub             v20.4s,  v16.4s,  v21.4s
403*c0909341SAndroid Build Coastguard Worker        sshr            v20.4s,  v20.4s,  #1
404*c0909341SAndroid Build Coastguard Worker        sub             v18.4s,  v20.4s,  v17.4s
405*c0909341SAndroid Build Coastguard Worker        sub             v17.4s,  v20.4s,  v19.4s
406*c0909341SAndroid Build Coastguard Worker        add             v19.4s,  v21.4s,  v18.4s
407*c0909341SAndroid Build Coastguard Worker        sub             v16.4s,  v16.4s,  v17.4s
408*c0909341SAndroid Build Coastguard Worker.endm
409*c0909341SAndroid Build Coastguard Worker
410*c0909341SAndroid Build Coastguard Worker.macro idct_4 r0, r1, r2, r3
411*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  \r1, \r3, v0.s[3], v0.s[2]
412*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  \r0, \r2, v0.s[0], v0.s[0]
413*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  \r1, \r3, v0.s[2], v0.s[3]
414*c0909341SAndroid Build Coastguard Worker        mul_mls         v3,  \r0, \r2, v0.s[0], v0.s[0]
415*c0909341SAndroid Build Coastguard Worker        srshr           v6.4s,  v6.4s,  #12
416*c0909341SAndroid Build Coastguard Worker        srshr           v2.4s,  v2.4s,  #12
417*c0909341SAndroid Build Coastguard Worker        srshr           v7.4s,  v4.4s,  #12
418*c0909341SAndroid Build Coastguard Worker        srshr           v3.4s,  v3.4s,  #12
419*c0909341SAndroid Build Coastguard Worker        sqadd           \r0\().4s,  v2.4s,   v6.4s
420*c0909341SAndroid Build Coastguard Worker        sqsub           \r3\().4s,  v2.4s,   v6.4s
421*c0909341SAndroid Build Coastguard Worker        sqadd           \r1\().4s,  v3.4s,   v7.4s
422*c0909341SAndroid Build Coastguard Worker        sqsub           \r2\().4s,  v3.4s,   v7.4s
423*c0909341SAndroid Build Coastguard Worker.endm
424*c0909341SAndroid Build Coastguard Worker
425*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x4_neon
426*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
427*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
428*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x16]
429*c0909341SAndroid Build Coastguard Worker        idct_4          v16, v17, v18, v19
430*c0909341SAndroid Build Coastguard Worker        ret
431*c0909341SAndroid Build Coastguard Workerendfunc
432*c0909341SAndroid Build Coastguard Worker
433*c0909341SAndroid Build Coastguard Worker.macro iadst_4x4 o0, o1, o2, o3
434*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst4_coeffs
435*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x16]
436*c0909341SAndroid Build Coastguard Worker
437*c0909341SAndroid Build Coastguard Worker        sub             v3.4s,   v16.4s,  v18.4s
438*c0909341SAndroid Build Coastguard Worker        mul             v4.4s,   v16.4s,  v0.s[0]
439*c0909341SAndroid Build Coastguard Worker        mla             v4.4s,   v18.4s,  v0.s[1]
440*c0909341SAndroid Build Coastguard Worker        mla             v4.4s,   v19.4s,  v0.s[2]
441*c0909341SAndroid Build Coastguard Worker        mul             v7.4s,   v17.4s,  v0.s[3]
442*c0909341SAndroid Build Coastguard Worker        add             v3.4s,   v3.4s,   v19.4s
443*c0909341SAndroid Build Coastguard Worker        mul             v5.4s,   v16.4s,  v0.s[2]
444*c0909341SAndroid Build Coastguard Worker        mls             v5.4s,   v18.4s,  v0.s[0]
445*c0909341SAndroid Build Coastguard Worker        mls             v5.4s,   v19.4s,  v0.s[1]
446*c0909341SAndroid Build Coastguard Worker
447*c0909341SAndroid Build Coastguard Worker        add             \o3\().4s, v4.4s,     v5.4s
448*c0909341SAndroid Build Coastguard Worker        mul             \o2\().4s, v3.4s,     v0.s[3]
449*c0909341SAndroid Build Coastguard Worker        add             \o0\().4s, v4.4s,     v7.4s
450*c0909341SAndroid Build Coastguard Worker        add             \o1\().4s, v5.4s,     v7.4s
451*c0909341SAndroid Build Coastguard Worker        sub             \o3\().4s, \o3\().4s, v7.4s
452*c0909341SAndroid Build Coastguard Worker
453*c0909341SAndroid Build Coastguard Worker        srshr           \o0\().4s, \o0\().4s, #12
454*c0909341SAndroid Build Coastguard Worker        srshr           \o2\().4s, \o2\().4s, #12
455*c0909341SAndroid Build Coastguard Worker        srshr           \o1\().4s, \o1\().4s, #12
456*c0909341SAndroid Build Coastguard Worker        srshr           \o3\().4s, \o3\().4s, #12
457*c0909341SAndroid Build Coastguard Worker.endm
458*c0909341SAndroid Build Coastguard Worker
459*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x4_neon
460*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
461*c0909341SAndroid Build Coastguard Worker        iadst_4x4       v16, v17, v18, v19
462*c0909341SAndroid Build Coastguard Worker        ret
463*c0909341SAndroid Build Coastguard Workerendfunc
464*c0909341SAndroid Build Coastguard Worker
465*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x4_neon
466*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
467*c0909341SAndroid Build Coastguard Worker        iadst_4x4       v19, v18, v17, v16
468*c0909341SAndroid Build Coastguard Worker        ret
469*c0909341SAndroid Build Coastguard Workerendfunc
470*c0909341SAndroid Build Coastguard Worker
471*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x4_neon
472*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
473*c0909341SAndroid Build Coastguard Worker        movz            w16, #(5793-4096)*8, lsl #16
474*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
475*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v4.4s,   v16.4s,  v0.s[0]
476*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v5.4s,   v17.4s,  v0.s[0]
477*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v6.4s,   v18.4s,  v0.s[0]
478*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v7.4s,   v19.4s,  v0.s[0]
479*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v4.4s
480*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v17.4s,  v5.4s
481*c0909341SAndroid Build Coastguard Worker        sqadd           v18.4s,  v18.4s,  v6.4s
482*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v19.4s,  v7.4s
483*c0909341SAndroid Build Coastguard Worker        ret
484*c0909341SAndroid Build Coastguard Workerendfunc
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_wht_wht_4x4_16bpc_neon, export=1
487*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
488*c0909341SAndroid Build Coastguard Worker        movi            v30.4s,  #0
489*c0909341SAndroid Build Coastguard Worker        movi            v31.4s,  #0
490*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s,v17.4s,v18.4s,v19.4s}, [x2]
491*c0909341SAndroid Build Coastguard Worker        st1             {v30.4s, v31.4s}, [x2], #32
492*c0909341SAndroid Build Coastguard Worker
493*c0909341SAndroid Build Coastguard Worker        sshr            v16.4s,  v16.4s,  #2
494*c0909341SAndroid Build Coastguard Worker        sshr            v17.4s,  v17.4s,  #2
495*c0909341SAndroid Build Coastguard Worker        sshr            v18.4s,  v18.4s,  #2
496*c0909341SAndroid Build Coastguard Worker        sshr            v19.4s,  v19.4s,  #2
497*c0909341SAndroid Build Coastguard Worker
498*c0909341SAndroid Build Coastguard Worker        iwht4
499*c0909341SAndroid Build Coastguard Worker
500*c0909341SAndroid Build Coastguard Worker        st1             {v30.4s, v31.4s}, [x2], #32
501*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v16, v17, v18, v19, v20, v21, v22, v23
502*c0909341SAndroid Build Coastguard Worker
503*c0909341SAndroid Build Coastguard Worker        iwht4
504*c0909341SAndroid Build Coastguard Worker
505*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[0], [x0], x1
506*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
507*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[1], [x0], x1
508*c0909341SAndroid Build Coastguard Worker        sqxtn2          v16.8h,  v17.4s
509*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[0], [x0], x1
510*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v18.4s
511*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[1], [x0], x1
512*c0909341SAndroid Build Coastguard Worker        sqxtn2          v18.8h,  v19.4s
513*c0909341SAndroid Build Coastguard Worker
514*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
515*c0909341SAndroid Build Coastguard Workerendfunc
516*c0909341SAndroid Build Coastguard Worker
517*c0909341SAndroid Build Coastguard Worker// HBD inv_txfm_add_4x4_neon deviates from the common pattern with registers
518*c0909341SAndroid Build Coastguard Worker// x0-x4  external parameters
519*c0909341SAndroid Build Coastguard Worker// x5     function pointer to first transform
520*c0909341SAndroid Build Coastguard Worker// x6     function pointer to second transform
521*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x4_neon
522*c0909341SAndroid Build Coastguard Worker        movi            v30.4s,  #0
523*c0909341SAndroid Build Coastguard Worker        movi            v31.4s,  #0
524*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s,v17.4s,v18.4s,v19.4s}, [x2]
525*c0909341SAndroid Build Coastguard Worker        st1             {v30.4s, v31.4s}, [x2], #32
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard Worker        blr             x5
528*c0909341SAndroid Build Coastguard Worker
529*c0909341SAndroid Build Coastguard Worker        st1             {v30.4s, v31.4s}, [x2], #32
530*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
531*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v17.4s
532*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v18.4s
533*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v19.4s
534*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v20, v21, v22, v23
535*c0909341SAndroid Build Coastguard Worker
536*c0909341SAndroid Build Coastguard Worker        blr             x6
537*c0909341SAndroid Build Coastguard Worker
538*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[0], [x0], x1
539*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[1], [x0], x1
540*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v17.d[0]
541*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v19.d[0]
542*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[0], [x0], x1
543*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[1], [x0], x1
544*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #4
545*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v18.8h,  #4
546*c0909341SAndroid Build Coastguard Worker
547*c0909341SAndroid Build Coastguard WorkerL(itx_4x4_end):
548*c0909341SAndroid Build Coastguard Worker        dup             v31.8h,  w4
549*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
550*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v16.8h
551*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   v18.8h
552*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v31.8h
553*c0909341SAndroid Build Coastguard Worker        st1             {v0.d}[0], [x0], x1
554*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v31.8h
555*c0909341SAndroid Build Coastguard Worker        st1             {v0.d}[1], [x0], x1
556*c0909341SAndroid Build Coastguard Worker        st1             {v1.d}[0], [x0], x1
557*c0909341SAndroid Build Coastguard Worker        st1             {v1.d}[1], [x0], x1
558*c0909341SAndroid Build Coastguard Worker
559*c0909341SAndroid Build Coastguard Worker        ret             x15
560*c0909341SAndroid Build Coastguard Workerendfunc
561*c0909341SAndroid Build Coastguard Worker
562*c0909341SAndroid Build Coastguard Worker.macro def_fn_4x4 txfm1, txfm2
563*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_4x4_16bpc_neon, export=1
564*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
565*c0909341SAndroid Build Coastguard Worker
566*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
567*c0909341SAndroid Build Coastguard Worker        cbnz            w3,  1f
568*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
569*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.4s}, [x2]
570*c0909341SAndroid Build Coastguard Worker        dup             v4.2s,   w16
571*c0909341SAndroid Build Coastguard Worker        str             wzr, [x2]
572*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.4s,  v16.4s,  v4.s[0]
573*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[0], [x0], x1
574*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v16.4s
575*c0909341SAndroid Build Coastguard Worker        sqxtn2          v20.8h,  v16.4s
576*c0909341SAndroid Build Coastguard Worker        ld1             {v0.d}[1], [x0], x1
577*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.8h,  v20.8h,  v4.h[1]
578*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[0], [x0], x1
579*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v20.8h,  #4
580*c0909341SAndroid Build Coastguard Worker        ld1             {v1.d}[1], [x0], x1
581*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v20.8h,  #4
582*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
583*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
584*c0909341SAndroid Build Coastguard Worker1:
585*c0909341SAndroid Build Coastguard Worker.endif
586*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm1\()_4s_x4_neon
587*c0909341SAndroid Build Coastguard Worker        movrel          x6,  X(inv_\txfm2\()_4h_x4_neon)
588*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_4x4_neon
589*c0909341SAndroid Build Coastguard Workerendfunc
590*c0909341SAndroid Build Coastguard Worker.endm
591*c0909341SAndroid Build Coastguard Worker
592*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, dct
593*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, identity
594*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, adst
595*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, flipadst
596*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, identity
597*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, dct
598*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, adst
599*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, flipadst
600*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, dct
601*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, adst
602*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, flipadst
603*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, dct
604*c0909341SAndroid Build Coastguard Worker
605*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, identity
606*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, identity
607*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, adst
608*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, flipadst
609*c0909341SAndroid Build Coastguard Worker
610*c0909341SAndroid Build Coastguard Worker.macro idct_8 r0, r1, r2, r3, r4, r5, r6, r7
611*c0909341SAndroid Build Coastguard Worker        idct_4          \r0, \r2, \r4, \r6
612*c0909341SAndroid Build Coastguard Worker
613*c0909341SAndroid Build Coastguard Worker        movi            v5.4s,  #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff
614*c0909341SAndroid Build Coastguard Worker        mvni            v4.4s,  #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000
615*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
616*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
617*c0909341SAndroid Build Coastguard Worker.endr
618*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
619*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
620*c0909341SAndroid Build Coastguard Worker.endr
621*c0909341SAndroid Build Coastguard Worker
622*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  \r1, \r7, v1.s[0], v1.s[1]  // -> t4a
623*c0909341SAndroid Build Coastguard Worker        mul_mla         v3,  \r1, \r7, v1.s[1], v1.s[0]  // -> t7a
624*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  \r5, \r3, v1.s[2], v1.s[3]  // -> t5a
625*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  \r5, \r3, v1.s[3], v1.s[2]  // -> t6a
626*c0909341SAndroid Build Coastguard Worker        srshr           \r1\().4s, v2.4s,  #12           // t4a
627*c0909341SAndroid Build Coastguard Worker        srshr           \r7\().4s, v3.4s,  #12           // t7a
628*c0909341SAndroid Build Coastguard Worker        srshr           \r3\().4s, v6.4s,  #12           // t5a
629*c0909341SAndroid Build Coastguard Worker        srshr           \r5\().4s, v7.4s,  #12           // t6a
630*c0909341SAndroid Build Coastguard Worker
631*c0909341SAndroid Build Coastguard Worker        sqadd           v2.4s,     \r1\().4s,  \r3\().4s // t4
632*c0909341SAndroid Build Coastguard Worker        sqsub           \r1\().4s, \r1\().4s,  \r3\().4s // t5a
633*c0909341SAndroid Build Coastguard Worker        sqadd           v3.4s,     \r7\().4s,  \r5\().4s // t7
634*c0909341SAndroid Build Coastguard Worker        sqsub           \r3\().4s, \r7\().4s,  \r5\().4s // t6a
635*c0909341SAndroid Build Coastguard Worker
636*c0909341SAndroid Build Coastguard Worker.irp r, v2, \r1, v3, \r3
637*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
638*c0909341SAndroid Build Coastguard Worker.endr
639*c0909341SAndroid Build Coastguard Worker.irp r, v2, \r1, v3, \r3
640*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
641*c0909341SAndroid Build Coastguard Worker.endr
642*c0909341SAndroid Build Coastguard Worker
643*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  \r3, \r1, v0.s[0], v0.s[0]  // -> t5
644*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  \r3, \r1, v0.s[0], v0.s[0]  // -> t6
645*c0909341SAndroid Build Coastguard Worker        srshr           v7.4s,  v7.4s,  #12              // t5
646*c0909341SAndroid Build Coastguard Worker        srshr           v6.4s,  v6.4s,  #12              // t6
647*c0909341SAndroid Build Coastguard Worker
648*c0909341SAndroid Build Coastguard Worker        sqsub           \r7\().4s,  \r0\().4s,  v3.4s    // out7
649*c0909341SAndroid Build Coastguard Worker        sqadd           \r0\().4s,  \r0\().4s,  v3.4s    // out0
650*c0909341SAndroid Build Coastguard Worker        sqadd           \r1\().4s,  \r2\().4s,  v6.4s    // out1
651*c0909341SAndroid Build Coastguard Worker        sqsub           v6.4s,      \r2\().4s,  v6.4s    // out6
652*c0909341SAndroid Build Coastguard Worker        sqadd           \r2\().4s,  \r4\().4s,  v7.4s    // out2
653*c0909341SAndroid Build Coastguard Worker        sqsub           \r5\().4s,  \r4\().4s,  v7.4s    // out5
654*c0909341SAndroid Build Coastguard Worker        sqadd           \r3\().4s,  \r6\().4s,  v2.4s    // out3
655*c0909341SAndroid Build Coastguard Worker        sqsub           \r4\().4s,  \r6\().4s,  v2.4s    // out4
656*c0909341SAndroid Build Coastguard Worker        mov             \r6\().16b, v6.16b               // out6
657*c0909341SAndroid Build Coastguard Worker.endm
658*c0909341SAndroid Build Coastguard Worker
659*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x8_neon
660*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
661*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
662*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
663*c0909341SAndroid Build Coastguard Worker        idct_8          v16, v17, v18, v19, v20, v21, v22, v23
664*c0909341SAndroid Build Coastguard Worker        ret
665*c0909341SAndroid Build Coastguard Workerendfunc
666*c0909341SAndroid Build Coastguard Worker
667*c0909341SAndroid Build Coastguard Worker.macro iadst_8 o0, o1, o2, o3, o4, o5, o6, o7
668*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst8_coeffs
669*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16], #32
670*c0909341SAndroid Build Coastguard Worker
671*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v23, v16, v0.s[0], v0.s[1]
672*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v23, v16, v0.s[1], v0.s[0]
673*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v21, v18, v0.s[2], v0.s[3]
674*c0909341SAndroid Build Coastguard Worker        srshr           v16.4s, v2.4s,  #12  // t0a
675*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v4.4s,  #12  // t1a
676*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v21, v18, v0.s[3], v0.s[2]
677*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v19, v20, v1.s[0], v1.s[1]
678*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v6.4s,  #12  // t2a
679*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v2.4s,  #12  // t3a
680*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v19, v20, v1.s[1], v1.s[0]
681*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v17, v22, v1.s[2], v1.s[3]
682*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v4.4s,  #12  // t4a
683*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v6.4s,  #12  // t5a
684*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v17, v22, v1.s[3], v1.s[2]
685*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v2.4s,  #12  // t6a
686*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v4.4s,  #12  // t7a
687*c0909341SAndroid Build Coastguard Worker
688*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x16]
689*c0909341SAndroid Build Coastguard Worker
690*c0909341SAndroid Build Coastguard Worker        movi            v1.4s,   #1, msl #16     // row_clip_max = ~(~bdmax << 7), 0x1ffff
691*c0909341SAndroid Build Coastguard Worker
692*c0909341SAndroid Build Coastguard Worker        sqadd           v2.4s,   v16.4s,  v20.4s // t0
693*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v16.4s,  v20.4s // t4
694*c0909341SAndroid Build Coastguard Worker        mvni            v20.4s,  #1, msl #16     // row_clip_min = (~bdmax << 7), 0xfffe0000
695*c0909341SAndroid Build Coastguard Worker        sqadd           v4.4s,   v23.4s,  v19.4s // t1
696*c0909341SAndroid Build Coastguard Worker        sqsub           v5.4s,   v23.4s,  v19.4s // t5
697*c0909341SAndroid Build Coastguard Worker        sqadd           v6.4s,   v18.4s,  v22.4s // t2
698*c0909341SAndroid Build Coastguard Worker        sqsub           v7.4s,   v18.4s,  v22.4s // t6
699*c0909341SAndroid Build Coastguard Worker        sqadd           v18.4s,  v21.4s,  v17.4s // t3
700*c0909341SAndroid Build Coastguard Worker        sqsub           v19.4s,  v21.4s,  v17.4s // t7
701*c0909341SAndroid Build Coastguard Worker
702*c0909341SAndroid Build Coastguard Worker.irp r, v2, v3, v4, v5, v6, v7, v18, v19
703*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v1
704*c0909341SAndroid Build Coastguard Worker.endr
705*c0909341SAndroid Build Coastguard Worker.irp r, v2, v3, v4, v5, v6, v7, v18, v19
706*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v20
707*c0909341SAndroid Build Coastguard Worker.endr
708*c0909341SAndroid Build Coastguard Worker
709*c0909341SAndroid Build Coastguard Worker        mul_mla         v16, v3,  v5,  v0.s[3], v0.s[2]
710*c0909341SAndroid Build Coastguard Worker        mul_mls         v20, v3,  v5,  v0.s[2], v0.s[3]
711*c0909341SAndroid Build Coastguard Worker        mul_mls         v22, v19, v7,  v0.s[3], v0.s[2]
712*c0909341SAndroid Build Coastguard Worker
713*c0909341SAndroid Build Coastguard Worker        srshr           v3.4s,  v16.4s, #12  // t4a
714*c0909341SAndroid Build Coastguard Worker        srshr           v5.4s,  v20.4s, #12  // t5a
715*c0909341SAndroid Build Coastguard Worker
716*c0909341SAndroid Build Coastguard Worker        mul_mla         v16, v19, v7,  v0.s[2], v0.s[3]
717*c0909341SAndroid Build Coastguard Worker
718*c0909341SAndroid Build Coastguard Worker        srshr           v7.4s,  v22.4s, #12  // t6a
719*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v16.4s, #12  // t7a
720*c0909341SAndroid Build Coastguard Worker
721*c0909341SAndroid Build Coastguard Worker        sqadd           \o0\().4s, v2.4s, v6.4s  // out0
722*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,     v2.4s, v6.4s  // t2
723*c0909341SAndroid Build Coastguard Worker        sqadd           \o7\().4s, v4.4s, v18.4s // out7
724*c0909341SAndroid Build Coastguard Worker        sqsub           v4.4s,     v4.4s, v18.4s // t3
725*c0909341SAndroid Build Coastguard Worker
726*c0909341SAndroid Build Coastguard Worker        mvni            v18.4s,  #1, msl #16     // row_clip_min = (~bdmax << 7), 0xfffe0000
727*c0909341SAndroid Build Coastguard Worker
728*c0909341SAndroid Build Coastguard Worker        sqadd           \o1\().4s, v3.4s, v7.4s  // out1
729*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,     v3.4s, v7.4s  // t6
730*c0909341SAndroid Build Coastguard Worker        sqadd           \o6\().4s, v5.4s, v19.4s // out6
731*c0909341SAndroid Build Coastguard Worker        sqsub           v5.4s,     v5.4s, v19.4s // t7
732*c0909341SAndroid Build Coastguard Worker
733*c0909341SAndroid Build Coastguard Worker        // Not clipping the output registers, as they will be downshifted and
734*c0909341SAndroid Build Coastguard Worker        // narrowed afterwards anyway.
735*c0909341SAndroid Build Coastguard Worker.irp r, v2, v4, v3, v5
736*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v1
737*c0909341SAndroid Build Coastguard Worker.endr
738*c0909341SAndroid Build Coastguard Worker.irp r, v2, v4, v3, v5
739*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v18
740*c0909341SAndroid Build Coastguard Worker.endr
741*c0909341SAndroid Build Coastguard Worker
742*c0909341SAndroid Build Coastguard Worker        sqneg           \o7\().4s, \o7\().4s     // out7
743*c0909341SAndroid Build Coastguard Worker        sqneg           \o1\().4s, \o1\().4s     // out1
744*c0909341SAndroid Build Coastguard Worker
745*c0909341SAndroid Build Coastguard Worker        mul_mla         v18, v2,  v4,  v0.s[0], v0.s[0] // -> out3 (v19 or v20)
746*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v2,  v4,  v0.s[0], v0.s[0] // -> out4 (v20 or v19)
747*c0909341SAndroid Build Coastguard Worker        mul_mls         v20, v3,  v5,  v0.s[0], v0.s[0] // -> out5 (v21 or v18)
748*c0909341SAndroid Build Coastguard Worker        srshr           v2.4s,  v18.4s, #12 // out3
749*c0909341SAndroid Build Coastguard Worker        mul_mla         v18, v3,  v5,  v0.s[0], v0.s[0] // -> out2 (v18 or v21)
750*c0909341SAndroid Build Coastguard Worker        srshr           v3.4s,  v20.4s, #12 // out5
751*c0909341SAndroid Build Coastguard Worker        srshr           \o2\().4s, v18.4s, #12 // out2 (v18 or v21)
752*c0909341SAndroid Build Coastguard Worker        srshr           \o4\().4s, v6.4s,  #12 // out4 (v20 or v19)
753*c0909341SAndroid Build Coastguard Worker
754*c0909341SAndroid Build Coastguard Worker        sqneg           \o3\().4s, v2.4s     // out3
755*c0909341SAndroid Build Coastguard Worker        sqneg           \o5\().4s, v3.4s     // out5
756*c0909341SAndroid Build Coastguard Worker.endm
757*c0909341SAndroid Build Coastguard Worker
758*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x8_neon
759*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
760*c0909341SAndroid Build Coastguard Worker        iadst_8         v16, v17, v18, v19, v20, v21, v22, v23
761*c0909341SAndroid Build Coastguard Worker        ret
762*c0909341SAndroid Build Coastguard Workerendfunc
763*c0909341SAndroid Build Coastguard Worker
764*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x8_neon
765*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
766*c0909341SAndroid Build Coastguard Worker        iadst_8         v23, v22, v21, v20, v19, v18, v17, v16
767*c0909341SAndroid Build Coastguard Worker        ret
768*c0909341SAndroid Build Coastguard Workerendfunc
769*c0909341SAndroid Build Coastguard Worker
770*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x8_neon
771*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
772*c0909341SAndroid Build Coastguard Worker        sqshl           v16.4s,  v16.4s,  #1
773*c0909341SAndroid Build Coastguard Worker        sqshl           v17.4s,  v17.4s,  #1
774*c0909341SAndroid Build Coastguard Worker        sqshl           v18.4s,  v18.4s,  #1
775*c0909341SAndroid Build Coastguard Worker        sqshl           v19.4s,  v19.4s,  #1
776*c0909341SAndroid Build Coastguard Worker        sqshl           v20.4s,  v20.4s,  #1
777*c0909341SAndroid Build Coastguard Worker        sqshl           v21.4s,  v21.4s,  #1
778*c0909341SAndroid Build Coastguard Worker        sqshl           v22.4s,  v22.4s,  #1
779*c0909341SAndroid Build Coastguard Worker        sqshl           v23.4s,  v23.4s,  #1
780*c0909341SAndroid Build Coastguard Worker        ret
781*c0909341SAndroid Build Coastguard Workerendfunc
782*c0909341SAndroid Build Coastguard Worker
783*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x8_neon
784*c0909341SAndroid Build Coastguard Worker        movi            v31.4s,  #0
785*c0909341SAndroid Build Coastguard Worker
786*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
787*c0909341SAndroid Build Coastguard Worker        mov             x11, #32
788*c0909341SAndroid Build Coastguard Worker        b.lt            1f
789*c0909341SAndroid Build Coastguard Worker
790*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
791*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
792*c0909341SAndroid Build Coastguard Worker        ld1             {\i},     [x6]
793*c0909341SAndroid Build Coastguard Worker        st1             {v31.4s}, [x6], x11
794*c0909341SAndroid Build Coastguard Worker.endr
795*c0909341SAndroid Build Coastguard Worker
796*c0909341SAndroid Build Coastguard Worker        blr             x4
797*c0909341SAndroid Build Coastguard Worker
798*c0909341SAndroid Build Coastguard Worker        sqrshrn         v24.4h,  v16.4s,  #1
799*c0909341SAndroid Build Coastguard Worker        sqrshrn         v25.4h,  v17.4s,  #1
800*c0909341SAndroid Build Coastguard Worker        sqrshrn         v26.4h,  v18.4s,  #1
801*c0909341SAndroid Build Coastguard Worker        sqrshrn         v27.4h,  v19.4s,  #1
802*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v24.8h,  v20.4s,  #1
803*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v25.8h,  v21.4s,  #1
804*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v26.8h,  v22.4s,  #1
805*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v27.8h,  v23.4s,  #1
806*c0909341SAndroid Build Coastguard Worker
807*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v24, v25, v26, v27, v2, v3, v4, v5
808*c0909341SAndroid Build Coastguard Worker
809*c0909341SAndroid Build Coastguard Worker        b               2f
810*c0909341SAndroid Build Coastguard Worker
811*c0909341SAndroid Build Coastguard Worker1:
812*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h
813*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
814*c0909341SAndroid Build Coastguard Worker.endr
815*c0909341SAndroid Build Coastguard Worker
816*c0909341SAndroid Build Coastguard Worker2:
817*c0909341SAndroid Build Coastguard Worker
818*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
819*c0909341SAndroid Build Coastguard Worker        ld1             {\i},     [x2]
820*c0909341SAndroid Build Coastguard Worker        st1             {v31.4s}, [x2], x11
821*c0909341SAndroid Build Coastguard Worker.endr
822*c0909341SAndroid Build Coastguard Worker
823*c0909341SAndroid Build Coastguard Worker        blr             x4
824*c0909341SAndroid Build Coastguard Worker
825*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
826*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #1
827*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #1
828*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #1
829*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #1
830*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #1
831*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #1
832*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #1
833*c0909341SAndroid Build Coastguard Worker
834*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v20, v21, v22, v23
835*c0909341SAndroid Build Coastguard Worker
836*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v24.16b
837*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v25.16b
838*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v26.16b
839*c0909341SAndroid Build Coastguard Worker        mov             v23.16b, v27.16b
840*c0909341SAndroid Build Coastguard Worker
841*c0909341SAndroid Build Coastguard Worker        blr             x5
842*c0909341SAndroid Build Coastguard Worker
843*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7
844*c0909341SAndroid Build Coastguard Worker        ret             x15
845*c0909341SAndroid Build Coastguard Workerendfunc
846*c0909341SAndroid Build Coastguard Worker
847*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8 txfm1, txfm2, eob_half
848*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_8x8_16bpc_neon, export=1
849*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
850*c0909341SAndroid Build Coastguard Worker
851*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
852*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   8,   1
853*c0909341SAndroid Build Coastguard Worker.endif
854*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_8h_x8_neon)
855*c0909341SAndroid Build Coastguard Worker        mov             w13, #\eob_half
856*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x8_neon
857*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_8x8_neon
858*c0909341SAndroid Build Coastguard Workerendfunc
859*c0909341SAndroid Build Coastguard Worker.endm
860*c0909341SAndroid Build Coastguard Worker
861*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, dct, 10
862*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, identity, 10
863*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, adst, 10
864*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, flipadst, 10
865*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, identity, 4
866*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, dct, 10
867*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, adst, 10
868*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, flipadst, 10
869*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, dct, 10
870*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, adst, 10
871*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, flipadst, 10
872*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, dct, 4
873*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, identity, 4
874*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, identity, 4
875*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, adst, 4
876*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, flipadst, 4
877*c0909341SAndroid Build Coastguard Worker
878*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x4_neon
879*c0909341SAndroid Build Coastguard Worker        movi            v28.4s,  #0
880*c0909341SAndroid Build Coastguard Worker        movi            v29.4s,  #0
881*c0909341SAndroid Build Coastguard Worker        movi            v30.4s,  #0
882*c0909341SAndroid Build Coastguard Worker        movi            v31.4s,  #0
883*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s,v17.4s,v18.4s,v19.4s}, [x2]
884*c0909341SAndroid Build Coastguard Worker        st1             {v28.4s,v29.4s,v30.4s,v31.4s}, [x2], #64
885*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
886*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
887*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4s,v21.4s,v22.4s,v23.4s}, [x2]
888*c0909341SAndroid Build Coastguard Worker        st1             {v28.4s,v29.4s,v30.4s,v31.4s}, [x2]
889*c0909341SAndroid Build Coastguard Worker
890*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
891*c0909341SAndroid Build Coastguard Worker
892*c0909341SAndroid Build Coastguard Worker        blr             x4
893*c0909341SAndroid Build Coastguard Worker
894*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
895*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v17.4s
896*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v18.4s
897*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v19.4s
898*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v20.4s
899*c0909341SAndroid Build Coastguard Worker        sqxtn           v21.4h,  v21.4s
900*c0909341SAndroid Build Coastguard Worker        sqxtn           v22.4h,  v22.4s
901*c0909341SAndroid Build Coastguard Worker        sqxtn           v23.4h,  v23.4s
902*c0909341SAndroid Build Coastguard Worker
903*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v4,  v5,  v6,  v7
904*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v20, v21, v22, v23, v4,  v5,  v6,  v7
905*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v20.d[0]
906*c0909341SAndroid Build Coastguard Worker        ins             v17.d[1], v21.d[0]
907*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v22.d[0]
908*c0909341SAndroid Build Coastguard Worker        ins             v19.d[1], v23.d[0]
909*c0909341SAndroid Build Coastguard Worker
910*c0909341SAndroid Build Coastguard Worker        blr             x5
911*c0909341SAndroid Build Coastguard Worker
912*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x0, x7
913*c0909341SAndroid Build Coastguard Worker        ret             x15
914*c0909341SAndroid Build Coastguard Workerendfunc
915*c0909341SAndroid Build Coastguard Worker
916*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x8_neon
917*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
918*c0909341SAndroid Build Coastguard Worker        movi            v31.4s,  #0
919*c0909341SAndroid Build Coastguard Worker        dup             v30.2s,  w16
920*c0909341SAndroid Build Coastguard Worker
921*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
922*c0909341SAndroid Build Coastguard Worker        mov             x11, #32
923*c0909341SAndroid Build Coastguard Worker        b.lt            1f
924*c0909341SAndroid Build Coastguard Worker
925*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
926*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
927*c0909341SAndroid Build Coastguard Worker        ld1             {\i},     [x6]
928*c0909341SAndroid Build Coastguard Worker        st1             {v31.4s}, [x6], x11
929*c0909341SAndroid Build Coastguard Worker.endr
930*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v30.s[0], v16, v17, v18, v19
931*c0909341SAndroid Build Coastguard Worker        blr             x4
932*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v16.4s
933*c0909341SAndroid Build Coastguard Worker        sqxtn           v21.4h,  v17.4s
934*c0909341SAndroid Build Coastguard Worker        sqxtn           v22.4h,  v18.4s
935*c0909341SAndroid Build Coastguard Worker        sqxtn           v23.4h,  v19.4s
936*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v20, v21, v22, v23, v4,  v5,  v6,  v7
937*c0909341SAndroid Build Coastguard Worker
938*c0909341SAndroid Build Coastguard Worker        b               2f
939*c0909341SAndroid Build Coastguard Worker
940*c0909341SAndroid Build Coastguard Worker1:
941*c0909341SAndroid Build Coastguard Worker.irp i, v20, v21, v22, v23
942*c0909341SAndroid Build Coastguard Worker        movi            \i\().4h, #0
943*c0909341SAndroid Build Coastguard Worker.endr
944*c0909341SAndroid Build Coastguard Worker
945*c0909341SAndroid Build Coastguard Worker2:
946*c0909341SAndroid Build Coastguard Worker
947*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
948*c0909341SAndroid Build Coastguard Worker        ld1             {\i},     [x2]
949*c0909341SAndroid Build Coastguard Worker        st1             {v31.4s}, [x2], x11
950*c0909341SAndroid Build Coastguard Worker.endr
951*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v30.s[0], v16, v17, v18, v19
952*c0909341SAndroid Build Coastguard Worker        blr             x4
953*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
954*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v17.4s
955*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v18.4s
956*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v19.4s
957*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v4,  v5,  v6,  v7
958*c0909341SAndroid Build Coastguard Worker
959*c0909341SAndroid Build Coastguard Worker        blr             x5
960*c0909341SAndroid Build Coastguard Worker
961*c0909341SAndroid Build Coastguard Worker        load_add_store_4x8 x0, x7
962*c0909341SAndroid Build Coastguard Worker        ret             x15
963*c0909341SAndroid Build Coastguard Workerendfunc
964*c0909341SAndroid Build Coastguard Worker
965*c0909341SAndroid Build Coastguard Worker.macro def_fn_48 w, h, txfm1, txfm2, eob_half
966*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
967*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
968*c0909341SAndroid Build Coastguard Worker
969*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
970*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  0
971*c0909341SAndroid Build Coastguard Worker.endif
972*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x\w\()_neon
973*c0909341SAndroid Build Coastguard Worker.if \w == 4
974*c0909341SAndroid Build Coastguard Worker        mov             w13, #\eob_half
975*c0909341SAndroid Build Coastguard Worker.endif
976*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_\w\()h_x\h\()_neon)
977*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
978*c0909341SAndroid Build Coastguard Workerendfunc
979*c0909341SAndroid Build Coastguard Worker.endm
980*c0909341SAndroid Build Coastguard Worker
981*c0909341SAndroid Build Coastguard Worker.macro def_fns_48 w, h
982*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, dct, 13
983*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, identity, 13
984*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, adst, 13
985*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, flipadst, 13
986*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, identity, 4
987*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, dct, 13
988*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, adst, 13
989*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, flipadst, 13
990*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, dct, 13
991*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, adst, 13
992*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, flipadst, 13
993*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, dct, 16
994*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, identity, 4
995*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, identity, 4
996*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, adst, 16
997*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, flipadst, 16
998*c0909341SAndroid Build Coastguard Worker.endm
999*c0909341SAndroid Build Coastguard Worker
1000*c0909341SAndroid Build Coastguard Workerdef_fns_48 4, 8
1001*c0909341SAndroid Build Coastguard Workerdef_fns_48 8, 4
1002*c0909341SAndroid Build Coastguard Worker
1003*c0909341SAndroid Build Coastguard Worker
1004*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x16_neon
1005*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
1006*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
1007*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16], #32
1008*c0909341SAndroid Build Coastguard Worker
1009*c0909341SAndroid Build Coastguard Worker        idct_8          v16, v18, v20, v22, v24, v26, v28, v30
1010*c0909341SAndroid Build Coastguard Worker
1011*c0909341SAndroid Build Coastguard Worker        // idct_8 leaves the row_clip_max/min constants in v5 and v4
1012*c0909341SAndroid Build Coastguard Worker.irp r, v16, v18, v20, v22, v24, v26, v28, v30
1013*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s,  \r\().4s,  v5.4s
1014*c0909341SAndroid Build Coastguard Worker.endr
1015*c0909341SAndroid Build Coastguard Worker.irp r, v16, v18, v20, v22, v24, v26, v28, v30
1016*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s,  \r\().4s,  v4.4s
1017*c0909341SAndroid Build Coastguard Worker.endr
1018*c0909341SAndroid Build Coastguard Worker
1019*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
1020*c0909341SAndroid Build Coastguard Worker        sub             x16, x16, #32
1021*c0909341SAndroid Build Coastguard Worker
1022*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v17, v31, v0.s[0], v0.s[1] // -> t8a
1023*c0909341SAndroid Build Coastguard Worker        mul_mla         v3,  v17, v31, v0.s[1], v0.s[0] // -> t15a
1024*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v25, v23, v0.s[2], v0.s[3] // -> t9a
1025*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v2.4s,  #12             // t8a
1026*c0909341SAndroid Build Coastguard Worker        srshr           v31.4s, v3.4s,  #12             // t15a
1027*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v25, v23, v0.s[3], v0.s[2] // -> t14a
1028*c0909341SAndroid Build Coastguard Worker        mul_mls         v3,  v21, v27, v1.s[0], v1.s[1] // -> t10a
1029*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v6.4s,  #12             // t9a
1030*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v2.4s,  #12             // t14a
1031*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v21, v27, v1.s[1], v1.s[0] // -> t13a
1032*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v29, v19, v1.s[2], v1.s[3] // -> t11a
1033*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v3.4s,  #12             // t10a
1034*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t13a
1035*c0909341SAndroid Build Coastguard Worker        mul_mla         v3,  v29, v19, v1.s[3], v1.s[2] // -> t12a
1036*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v2.4s,  #12             // t11a
1037*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v3.4s,  #12             // t12a
1038*c0909341SAndroid Build Coastguard Worker
1039*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x16]
1040*c0909341SAndroid Build Coastguard Worker
1041*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v17.4s,  v23.4s  // t9
1042*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v17.4s,  v23.4s  // t8
1043*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v25.4s  // t14
1044*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v25.4s  // t15
1045*c0909341SAndroid Build Coastguard Worker        sqsub           v23.4s,  v19.4s,  v21.4s  // t10
1046*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v19.4s,  v21.4s  // t11
1047*c0909341SAndroid Build Coastguard Worker        sqadd           v25.4s,  v29.4s,  v27.4s  // t12
1048*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v29.4s,  v27.4s  // t13
1049*c0909341SAndroid Build Coastguard Worker
1050*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v23, v19, v25, v29
1051*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s,  \r\().4s,  v5.4s
1052*c0909341SAndroid Build Coastguard Worker.endr
1053*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v23, v19, v25, v29
1054*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s,  \r\().4s,  v4.4s
1055*c0909341SAndroid Build Coastguard Worker.endr
1056*c0909341SAndroid Build Coastguard Worker
1057*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v3,  v2,  v0.s[2], v0.s[3] // -> t9a
1058*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v3,  v2,  v0.s[3], v0.s[2] // -> t14a
1059*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v7.4s,  #12             // t9a
1060*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t14a
1061*c0909341SAndroid Build Coastguard Worker
1062*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v29, v23, v0.s[2], v0.s[3] // -> t13a
1063*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v29, v23, v0.s[3], v0.s[2] // -> t10a
1064*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v7.4s,  #12             // t13a
1065*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s
1066*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v6.4s,  #12             // t10a
1067*c0909341SAndroid Build Coastguard Worker
1068*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v17.4s,  v19.4s  // t11a
1069*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v17.4s,  v19.4s  // t8a
1070*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v25.4s  // t12a
1071*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v25.4s  // t15a
1072*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v21.4s,  v23.4s  // t9
1073*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,  v21.4s,  v23.4s  // t10
1074*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v27.4s,  v29.4s  // t13
1075*c0909341SAndroid Build Coastguard Worker        sqadd           v27.4s,  v27.4s,  v29.4s  // t14
1076*c0909341SAndroid Build Coastguard Worker
1077*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v19, v21, v25, v27
1078*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s,  \r\().4s,  v5.4s
1079*c0909341SAndroid Build Coastguard Worker.endr
1080*c0909341SAndroid Build Coastguard Worker.irp r, v2, v17, v3, v31, v19, v21, v25, v27
1081*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s,  \r\().4s,  v4.4s
1082*c0909341SAndroid Build Coastguard Worker.endr
1083*c0909341SAndroid Build Coastguard Worker
1084*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v3,  v2,  v0.s[0], v0.s[0] // -> t11
1085*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v3,  v2,  v0.s[0], v0.s[0] // -> t12
1086*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v25, v21, v0.s[0], v0.s[0] // -> t10a
1087*c0909341SAndroid Build Coastguard Worker
1088*c0909341SAndroid Build Coastguard Worker        srshr           v7.4s,  v7.4s,  #12   // t11
1089*c0909341SAndroid Build Coastguard Worker        srshr           v6.4s,  v6.4s,  #12   // t12
1090*c0909341SAndroid Build Coastguard Worker        mul_mla         v3,  v25, v21, v0.s[0], v0.s[0] // -> t13a
1091*c0909341SAndroid Build Coastguard Worker        srshr           v2.4s,  v2.4s,  #12   // t10a
1092*c0909341SAndroid Build Coastguard Worker        srshr           v3.4s,  v3.4s,  #12   // t13a
1093*c0909341SAndroid Build Coastguard Worker
1094*c0909341SAndroid Build Coastguard Worker        sqadd           v1.4s,   v16.4s,  v31.4s  // out0
1095*c0909341SAndroid Build Coastguard Worker        sqsub           v31.4s,  v16.4s,  v31.4s  // out15
1096*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v1.16b
1097*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v30.4s,  v17.4s  // out7
1098*c0909341SAndroid Build Coastguard Worker        sqsub           v1.4s,   v30.4s,  v17.4s  // out8
1099*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v18.4s,  v27.4s  // out1
1100*c0909341SAndroid Build Coastguard Worker        sqsub           v30.4s,  v18.4s,  v27.4s  // out14
1101*c0909341SAndroid Build Coastguard Worker        sqadd           v18.4s,  v20.4s,  v3.4s   // out2
1102*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v20.4s,  v3.4s   // out13
1103*c0909341SAndroid Build Coastguard Worker        sqadd           v3.4s,   v28.4s,  v19.4s  // out6
1104*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v28.4s,  v19.4s  // out9
1105*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v22.4s,  v6.4s   // out3
1106*c0909341SAndroid Build Coastguard Worker        sqsub           v28.4s,  v22.4s,  v6.4s   // out12
1107*c0909341SAndroid Build Coastguard Worker        sqadd           v20.4s,  v24.4s,  v7.4s   // out4
1108*c0909341SAndroid Build Coastguard Worker        sqsub           v27.4s,  v24.4s,  v7.4s   // out11
1109*c0909341SAndroid Build Coastguard Worker        sqadd           v21.4s,  v26.4s,  v2.4s   // out5
1110*c0909341SAndroid Build Coastguard Worker        sqsub           v26.4s,  v26.4s,  v2.4s   // out10
1111*c0909341SAndroid Build Coastguard Worker        mov             v24.16b, v1.16b
1112*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v3.16b
1113*c0909341SAndroid Build Coastguard Worker
1114*c0909341SAndroid Build Coastguard Worker        ret
1115*c0909341SAndroid Build Coastguard Workerendfunc
1116*c0909341SAndroid Build Coastguard Worker
1117*c0909341SAndroid Build Coastguard Worker.macro iadst_16 o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14, o15
1118*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst16_coeffs
1119*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16], #32
1120*c0909341SAndroid Build Coastguard Worker
1121*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v31, v16, v0.s[0], v0.s[1] // -> t0
1122*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v31, v16, v0.s[1], v0.s[0] // -> t1
1123*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v29, v18, v0.s[2], v0.s[3] // -> t2
1124*c0909341SAndroid Build Coastguard Worker        srshr           v16.4s, v2.4s,  #12             // t0
1125*c0909341SAndroid Build Coastguard Worker        srshr           v31.4s, v4.4s,  #12             // t1
1126*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v29, v18, v0.s[3], v0.s[2] // -> t3
1127*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v27, v20, v1.s[0], v1.s[1] // -> t4
1128*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v6.4s,  #12             // t2
1129*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v2.4s,  #12             // t3
1130*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v27, v20, v1.s[1], v1.s[0] // -> t5
1131*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v25, v22, v1.s[2], v1.s[3] // -> t6
1132*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v4.4s,  #12             // t4
1133*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t5
1134*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v25, v22, v1.s[3], v1.s[2] // -> t7
1135*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
1136*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
1137*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v23, v24, v0.s[0], v0.s[1] // -> t8
1138*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v2.4s,  #12             // t6
1139*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v4.4s,  #12             // t7
1140*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v23, v24, v0.s[1], v0.s[0] // -> t9
1141*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v21, v26, v0.s[2], v0.s[3] // -> t10
1142*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v6.4s,  #12             // t8
1143*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v2.4s,  #12             // t9
1144*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v21, v26, v0.s[3], v0.s[2] // -> t11
1145*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v19, v28, v1.s[0], v1.s[1] // -> t12
1146*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v4.4s,  #12             // t10
1147*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v6.4s,  #12             // t11
1148*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v19, v28, v1.s[1], v1.s[0] // -> t13
1149*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v17, v30, v1.s[2], v1.s[3] // -> t14
1150*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v2.4s,  #12             // t12
1151*c0909341SAndroid Build Coastguard Worker        srshr           v28.4s, v4.4s,  #12             // t13
1152*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v17, v30, v1.s[3], v1.s[2] // -> t15
1153*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v6.4s,  #12             // t14
1154*c0909341SAndroid Build Coastguard Worker        srshr           v30.4s, v2.4s,  #12             // t15
1155*c0909341SAndroid Build Coastguard Worker
1156*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
1157*c0909341SAndroid Build Coastguard Worker
1158*c0909341SAndroid Build Coastguard Worker        movi            v5.4s,   #1, msl #16     // row_clip_max = ~(~bdmax << 7), 0x1ffff
1159*c0909341SAndroid Build Coastguard Worker        mvni            v7.4s,   #1, msl #16     // row_clip_min = (~bdmax << 7), 0xfffe0000
1160*c0909341SAndroid Build Coastguard Worker
1161*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v16.4s,  v23.4s // t8a
1162*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v23.4s // t0a
1163*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v24.4s // t9a
1164*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v24.4s // t1a
1165*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v18.4s,  v21.4s // t2a
1166*c0909341SAndroid Build Coastguard Worker        sqsub           v18.4s,  v18.4s,  v21.4s // t10a
1167*c0909341SAndroid Build Coastguard Worker        sqadd           v24.4s,  v29.4s,  v26.4s // t3a
1168*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v29.4s,  v26.4s // t11a
1169*c0909341SAndroid Build Coastguard Worker        sqadd           v21.4s,  v20.4s,  v19.4s // t4a
1170*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,  v20.4s,  v19.4s // t12a
1171*c0909341SAndroid Build Coastguard Worker        sqadd           v26.4s,  v27.4s,  v28.4s // t5a
1172*c0909341SAndroid Build Coastguard Worker        sqsub           v27.4s,  v27.4s,  v28.4s // t13a
1173*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v22.4s,  v17.4s // t6a
1174*c0909341SAndroid Build Coastguard Worker        sqsub           v22.4s,  v22.4s,  v17.4s // t14a
1175*c0909341SAndroid Build Coastguard Worker        sqadd           v28.4s,  v25.4s,  v30.4s // t7a
1176*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v25.4s,  v30.4s // t15a
1177*c0909341SAndroid Build Coastguard Worker
1178*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v18, v24, v29, v21, v20, v26, v27, v19, v22, v28, v25
1179*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
1180*c0909341SAndroid Build Coastguard Worker.endr
1181*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v18, v24, v29, v21, v20, v26, v27, v19, v22, v28, v25
1182*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v7
1183*c0909341SAndroid Build Coastguard Worker.endr
1184*c0909341SAndroid Build Coastguard Worker
1185*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v2,  v3,  v1.s[1], v1.s[0] // -> t8
1186*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v2,  v3,  v1.s[0], v1.s[1] // -> t9
1187*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v18, v29, v1.s[3], v1.s[2] // -> t10
1188*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v4.4s,  #12             // t8
1189*c0909341SAndroid Build Coastguard Worker        srshr           v30.4s, v6.4s,  #12             // t9
1190*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v18, v29, v1.s[2], v1.s[3] // -> t11
1191*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v27, v20, v1.s[1], v1.s[0] // -> t12
1192*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v2.4s,  #12             // t10
1193*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v4.4s,  #12             // t11
1194*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v27, v20, v1.s[0], v1.s[1] // -> t13
1195*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v25, v22, v1.s[3], v1.s[2] // -> t14
1196*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t12
1197*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v2.4s,  #12             // t13
1198*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v25, v22, v1.s[2], v1.s[3] // -> t15
1199*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v4.4s,  #12             // t14
1200*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v6.4s,  #12             // t15
1201*c0909341SAndroid Build Coastguard Worker
1202*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v16.4s,  v21.4s // t4
1203*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v21.4s // t0
1204*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v26.4s // t5
1205*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v26.4s // t1
1206*c0909341SAndroid Build Coastguard Worker        sqadd           v21.4s,  v23.4s,  v19.4s // t2
1207*c0909341SAndroid Build Coastguard Worker        sqsub           v23.4s,  v23.4s,  v19.4s // t6
1208*c0909341SAndroid Build Coastguard Worker        sqadd           v26.4s,  v24.4s,  v28.4s // t3
1209*c0909341SAndroid Build Coastguard Worker        sqsub           v24.4s,  v24.4s,  v28.4s // t7
1210*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4s,  v17.4s,  v27.4s // t8a
1211*c0909341SAndroid Build Coastguard Worker        sqsub           v17.4s,  v17.4s,  v27.4s // t12a
1212*c0909341SAndroid Build Coastguard Worker        sqadd           v28.4s,  v30.4s,  v20.4s // t9a
1213*c0909341SAndroid Build Coastguard Worker        sqsub           v30.4s,  v30.4s,  v20.4s // t13a
1214*c0909341SAndroid Build Coastguard Worker        sqadd           v27.4s,  v18.4s,  v25.4s // t10a
1215*c0909341SAndroid Build Coastguard Worker        sqsub           v18.4s,  v18.4s,  v25.4s // t14a
1216*c0909341SAndroid Build Coastguard Worker        sqadd           v20.4s,  v29.4s,  v22.4s // t11a
1217*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v29.4s,  v22.4s // t15a
1218*c0909341SAndroid Build Coastguard Worker
1219*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v21, v23, v26, v24, v19, v17, v28, v30, v27, v18, v20, v29
1220*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
1221*c0909341SAndroid Build Coastguard Worker.endr
1222*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v21, v23, v26, v24, v19, v17, v28, v30, v27, v18, v20, v29
1223*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v7
1224*c0909341SAndroid Build Coastguard Worker.endr
1225*c0909341SAndroid Build Coastguard Worker
1226*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v2,  v3,  v0.s[3], v0.s[2] // -> t4a
1227*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v2,  v3,  v0.s[2], v0.s[3] // -> t5a
1228*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v24, v23, v0.s[3], v0.s[2] // -> t6a
1229*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v4.4s,  #12             // t4a
1230*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v6.4s,  #12             // t5a
1231*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v24, v23, v0.s[2], v0.s[3] // -> t7a
1232*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v17, v30, v0.s[3], v0.s[2] // -> t12
1233*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v2.4s,  #12             // t6a
1234*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v4.4s,  #12             // t7a
1235*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v17, v30, v0.s[2], v0.s[3] // -> t13
1236*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v29, v18, v0.s[3], v0.s[2] // -> t14
1237*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v6.4s,  #12             // t12
1238*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v29, v18, v0.s[2], v0.s[3] // -> t15
1239*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v2.4s,  #12             // t13
1240*c0909341SAndroid Build Coastguard Worker        srshr           v30.4s, v4.4s,  #12             // t14
1241*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v6.4s,  #12             // t15
1242*c0909341SAndroid Build Coastguard Worker
1243*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v16.4s,  v21.4s // t2a
1244*c0909341SAndroid Build Coastguard Worker.ifc \o0, v16
1245*c0909341SAndroid Build Coastguard Worker        sqadd           \o0\().4s,  v16.4s,  v21.4s // out0
1246*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,     v31.4s,  v26.4s // t3a
1247*c0909341SAndroid Build Coastguard Worker        sqadd           \o15\().4s, v31.4s,  v26.4s // out15
1248*c0909341SAndroid Build Coastguard Worker.else
1249*c0909341SAndroid Build Coastguard Worker        sqadd           v4.4s,      v16.4s,  v21.4s // out0
1250*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,     v31.4s,  v26.4s // t3a
1251*c0909341SAndroid Build Coastguard Worker        sqadd           \o15\().4s, v31.4s,  v26.4s // out15
1252*c0909341SAndroid Build Coastguard Worker        mov             \o0\().16b, v4.16b
1253*c0909341SAndroid Build Coastguard Worker.endif
1254*c0909341SAndroid Build Coastguard Worker
1255*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,      v29.4s,  v18.4s // t15a
1256*c0909341SAndroid Build Coastguard Worker        sqadd           \o13\().4s, v29.4s,  v18.4s // out13
1257*c0909341SAndroid Build Coastguard Worker        sqadd           \o2\().4s,  v17.4s,  v30.4s // out2
1258*c0909341SAndroid Build Coastguard Worker        sqsub           v26.4s,     v17.4s,  v30.4s // t14a
1259*c0909341SAndroid Build Coastguard Worker
1260*c0909341SAndroid Build Coastguard Worker        sqadd           \o1\().4s,  v19.4s,  v27.4s // out1
1261*c0909341SAndroid Build Coastguard Worker        sqsub           v27.4s,     v19.4s,  v27.4s // t10
1262*c0909341SAndroid Build Coastguard Worker        sqadd           \o14\().4s, v28.4s,  v20.4s // out14
1263*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,     v28.4s,  v20.4s // t11
1264*c0909341SAndroid Build Coastguard Worker
1265*c0909341SAndroid Build Coastguard Worker        sqadd           \o3\().4s,  v22.4s,  v24.4s // out3
1266*c0909341SAndroid Build Coastguard Worker        sqsub           v22.4s,     v22.4s,  v24.4s // t6
1267*c0909341SAndroid Build Coastguard Worker        sqadd           \o12\().4s, v25.4s,  v23.4s // out12
1268*c0909341SAndroid Build Coastguard Worker        sqsub           v23.4s,     v25.4s,  v23.4s // t7
1269*c0909341SAndroid Build Coastguard Worker
1270*c0909341SAndroid Build Coastguard Worker        // Not clipping the output registers, as they will be downshifted and
1271*c0909341SAndroid Build Coastguard Worker        // narrowed afterwards anyway.
1272*c0909341SAndroid Build Coastguard Worker.irp r, v2, v21, v3, v26, v27, v20, v22, v23
1273*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
1274*c0909341SAndroid Build Coastguard Worker.endr
1275*c0909341SAndroid Build Coastguard Worker.irp r, v2, v21, v3, v26, v27, v20, v22, v23
1276*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v7
1277*c0909341SAndroid Build Coastguard Worker.endr
1278*c0909341SAndroid Build Coastguard Worker
1279*c0909341SAndroid Build Coastguard Worker        sqneg           \o15\().4s, \o15\().4s      // out15
1280*c0909341SAndroid Build Coastguard Worker        sqneg           \o13\().4s, \o13\().4s      // out13
1281*c0909341SAndroid Build Coastguard Worker        sqneg           \o1\().4s,  \o1\().4s       // out1
1282*c0909341SAndroid Build Coastguard Worker        sqneg           \o3\().4s,  \o3\().4s       // out3
1283*c0909341SAndroid Build Coastguard Worker
1284*c0909341SAndroid Build Coastguard Worker        mul_mls         v24, v2,  v21, v0.s[0], v0.s[0] // -> out8 (v24 or v23)
1285*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v2,  v21, v0.s[0], v0.s[0] // -> out7 (v23 or v24)
1286*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v26, v3,  v0.s[0], v0.s[0] // -> out5 (v21 or v26)
1287*c0909341SAndroid Build Coastguard Worker
1288*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v24.4s, #12             // out8
1289*c0909341SAndroid Build Coastguard Worker        srshr           v4.4s,  v4.4s,  #12             // out7
1290*c0909341SAndroid Build Coastguard Worker        srshr           v5.4s,  v6.4s,  #12             // out5
1291*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v26, v3,  v0.s[0], v0.s[0] // -> out10 (v26 or v21)
1292*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v22, v23, v0.s[0], v0.s[0] // -> out4 (v20 or v27)
1293*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v6.4s,  #12             // out10
1294*c0909341SAndroid Build Coastguard Worker
1295*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v22, v23, v0.s[0], v0.s[0] // -> out11 (v27 or v20)
1296*c0909341SAndroid Build Coastguard Worker        mul_mla         v22, v27, v20, v0.s[0], v0.s[0] // -> out6 (v22 or v25)
1297*c0909341SAndroid Build Coastguard Worker        mul_mls         v21, v27, v20, v0.s[0], v0.s[0] // -> out9 (v25 or v22)
1298*c0909341SAndroid Build Coastguard Worker
1299*c0909341SAndroid Build Coastguard Worker        srshr           \o4\().4s,   v2.4s,  #12        // out4
1300*c0909341SAndroid Build Coastguard Worker        srshr           v6.4s,       v6.4s,  #12        // out11
1301*c0909341SAndroid Build Coastguard Worker        srshr           v7.4s,       v21.4s, #12        // out9
1302*c0909341SAndroid Build Coastguard Worker        srshr           \o6\().4s,   v22.4s, #12        // out6
1303*c0909341SAndroid Build Coastguard Worker
1304*c0909341SAndroid Build Coastguard Worker.ifc \o8, v23
1305*c0909341SAndroid Build Coastguard Worker        mov             \o8\().16b,  v24.16b
1306*c0909341SAndroid Build Coastguard Worker        mov             \o10\().16b, v26.16b
1307*c0909341SAndroid Build Coastguard Worker.endif
1308*c0909341SAndroid Build Coastguard Worker
1309*c0909341SAndroid Build Coastguard Worker        sqneg           \o7\().4s,   v4.4s // out7
1310*c0909341SAndroid Build Coastguard Worker        sqneg           \o5\().4s,   v5.4s // out5
1311*c0909341SAndroid Build Coastguard Worker        sqneg           \o11\().4s,  v6.4s // out11
1312*c0909341SAndroid Build Coastguard Worker        sqneg           \o9\().4s,   v7.4s // out9
1313*c0909341SAndroid Build Coastguard Worker.endm
1314*c0909341SAndroid Build Coastguard Worker
1315*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x16_neon
1316*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
1317*c0909341SAndroid Build Coastguard Worker        iadst_16        v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
1318*c0909341SAndroid Build Coastguard Worker        ret
1319*c0909341SAndroid Build Coastguard Workerendfunc
1320*c0909341SAndroid Build Coastguard Worker
1321*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x16_neon
1322*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
1323*c0909341SAndroid Build Coastguard Worker        iadst_16        v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16
1324*c0909341SAndroid Build Coastguard Worker        ret
1325*c0909341SAndroid Build Coastguard Workerendfunc
1326*c0909341SAndroid Build Coastguard Worker
1327*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x16_neon
1328*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
1329*c0909341SAndroid Build Coastguard Worker        movz            w16, #2*(5793-4096)*8, lsl #16
1330*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1331*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
1332*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.4s,      v\i\().4s,  v0.s[0]
1333*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().4s,  v\i\().4s,  v\i\().4s
1334*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().4s,  v\i\().4s,  v2.4s
1335*c0909341SAndroid Build Coastguard Worker.endr
1336*c0909341SAndroid Build Coastguard Worker        ret
1337*c0909341SAndroid Build Coastguard Workerendfunc
1338*c0909341SAndroid Build Coastguard Worker
1339*c0909341SAndroid Build Coastguard Worker.macro identity_4x16_shift1 c
1340*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1341*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v3.4s,   \i,      \c
1342*c0909341SAndroid Build Coastguard Worker        srshr           v3.4s,   v3.4s,   #1
1343*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      v3.4s
1344*c0909341SAndroid Build Coastguard Worker.endr
1345*c0909341SAndroid Build Coastguard Worker.endm
1346*c0909341SAndroid Build Coastguard Worker
1347*c0909341SAndroid Build Coastguard Worker.macro identity_4x16 c
1348*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1349*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v3.4s,   \i,      \c
1350*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      \i
1351*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      v3.4s
1352*c0909341SAndroid Build Coastguard Worker.endr
1353*c0909341SAndroid Build Coastguard Worker.endm
1354*c0909341SAndroid Build Coastguard Worker
1355*c0909341SAndroid Build Coastguard Worker.macro def_horz_16 scale=0, shift=2, suffix
1356*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_16x4_neon
1357*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
1358*c0909341SAndroid Build Coastguard Worker        movi            v7.4s,  #0
1359*c0909341SAndroid Build Coastguard Worker.if \scale
1360*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1361*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1362*c0909341SAndroid Build Coastguard Worker.endif
1363*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1364*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
1365*c0909341SAndroid Build Coastguard Worker        st1             {v7.4s}, [x7], x8
1366*c0909341SAndroid Build Coastguard Worker.endr
1367*c0909341SAndroid Build Coastguard Worker.if \scale
1368*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1369*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31
1370*c0909341SAndroid Build Coastguard Worker.endif
1371*c0909341SAndroid Build Coastguard Worker        blr             x4
1372*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #\shift
1373*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #\shift
1374*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #\shift
1375*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #\shift
1376*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #\shift
1377*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #\shift
1378*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #\shift
1379*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #\shift
1380*c0909341SAndroid Build Coastguard Worker        sqrshrn         v20.4h,  v24.4s,  #\shift
1381*c0909341SAndroid Build Coastguard Worker        sqrshrn         v21.4h,  v25.4s,  #\shift
1382*c0909341SAndroid Build Coastguard Worker        sqrshrn         v22.4h,  v26.4s,  #\shift
1383*c0909341SAndroid Build Coastguard Worker        sqrshrn         v23.4h,  v27.4s,  #\shift
1384*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v20.8h,  v28.4s,  #\shift
1385*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v21.8h,  v29.4s,  #\shift
1386*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v22.8h,  v30.4s,  #\shift
1387*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v23.8h,  v31.4s,  #\shift
1388*c0909341SAndroid Build Coastguard Worker.if \scale
1389*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x4_epilog)
1390*c0909341SAndroid Build Coastguard Worker.else
1391*c0909341SAndroid Build Coastguard WorkerL(horz_16x4_epilog):
1392*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v4,  v5,  v6,  v7
1393*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v20, v21, v22, v23, v4,  v5,  v6,  v7
1394*c0909341SAndroid Build Coastguard Worker
1395*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v20.8h, v17.8h, v21.8h, v18.8h, v22.8h, v19.8h, v23.8h
1396*c0909341SAndroid Build Coastguard Worker        st1             {\i}, [x6], #16
1397*c0909341SAndroid Build Coastguard Worker.endr
1398*c0909341SAndroid Build Coastguard Worker
1399*c0909341SAndroid Build Coastguard Worker        ret             x14
1400*c0909341SAndroid Build Coastguard Worker.endif
1401*c0909341SAndroid Build Coastguard Workerendfunc
1402*c0909341SAndroid Build Coastguard Worker.endm
1403*c0909341SAndroid Build Coastguard Worker
1404*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, shift=1, suffix=_scale
1405*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, shift=2
1406*c0909341SAndroid Build Coastguard Worker
1407*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_8x16_neon
1408*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
1409*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
1410*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
1411*c0909341SAndroid Build Coastguard Worker.endr
1412*c0909341SAndroid Build Coastguard Worker        blr             x5
1413*c0909341SAndroid Build Coastguard Worker        load_add_store_8x16 x6, x7
1414*c0909341SAndroid Build Coastguard Worker        ret             x14
1415*c0909341SAndroid Build Coastguard Workerendfunc
1416*c0909341SAndroid Build Coastguard Worker
1417*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x16_neon
1418*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1419*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
1420*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
1421*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
1422*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*16*2)
1423*c0909341SAndroid Build Coastguard Worker.if \i > 0
1424*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(16 - \i)
1425*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1426*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1427*c0909341SAndroid Build Coastguard Worker.if \i < 12
1428*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
1429*c0909341SAndroid Build Coastguard Worker.endif
1430*c0909341SAndroid Build Coastguard Worker.endif
1431*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
1432*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*4
1433*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x4_neon
1434*c0909341SAndroid Build Coastguard Worker.endr
1435*c0909341SAndroid Build Coastguard Worker        b               3f
1436*c0909341SAndroid Build Coastguard Worker1:
1437*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
1438*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
1439*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
1440*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
1441*c0909341SAndroid Build Coastguard Worker2:
1442*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
1443*c0909341SAndroid Build Coastguard Worker.rept 2
1444*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
1445*c0909341SAndroid Build Coastguard Worker.endr
1446*c0909341SAndroid Build Coastguard Worker        b.gt            2b
1447*c0909341SAndroid Build Coastguard Worker3:
1448*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1449*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
1450*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
1451*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32
1452*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
1453*c0909341SAndroid Build Coastguard Worker.endr
1454*c0909341SAndroid Build Coastguard Worker
1455*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
1456*c0909341SAndroid Build Coastguard Worker        ret             x15
1457*c0909341SAndroid Build Coastguard Workerendfunc
1458*c0909341SAndroid Build Coastguard Worker
1459*c0909341SAndroid Build Coastguard Workerconst eob_16x16
1460*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 256
1461*c0909341SAndroid Build Coastguard Workerendconst
1462*c0909341SAndroid Build Coastguard Worker
1463*c0909341SAndroid Build Coastguard Workerconst eob_16x16_identity
1464*c0909341SAndroid Build Coastguard Worker        .short 4, 8, 12, 256
1465*c0909341SAndroid Build Coastguard Workerendconst
1466*c0909341SAndroid Build Coastguard Worker
1467*c0909341SAndroid Build Coastguard Worker.macro def_fn_16x16 txfm1, txfm2
1468*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_16x16_16bpc_neon, export=1
1469*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1470*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  16,  2
1471*c0909341SAndroid Build Coastguard Worker.endif
1472*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x16_neon
1473*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_8h_x16_neon)
1474*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1475*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1476*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x16
1477*c0909341SAndroid Build Coastguard Worker.else
1478*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x16_identity
1479*c0909341SAndroid Build Coastguard Worker.endif
1480*c0909341SAndroid Build Coastguard Worker.else
1481*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1482*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x16_identity
1483*c0909341SAndroid Build Coastguard Worker.else
1484*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x16
1485*c0909341SAndroid Build Coastguard Worker.endif
1486*c0909341SAndroid Build Coastguard Worker.endif
1487*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_16x16_neon
1488*c0909341SAndroid Build Coastguard Workerendfunc
1489*c0909341SAndroid Build Coastguard Worker.endm
1490*c0909341SAndroid Build Coastguard Worker
1491*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, dct
1492*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, identity
1493*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, adst
1494*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, flipadst
1495*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, identity
1496*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, dct
1497*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, adst
1498*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, flipadst
1499*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, dct
1500*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, adst
1501*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, flipadst
1502*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, dct
1503*c0909341SAndroid Build Coastguard Worker
1504*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x4_neon
1505*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1506*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,  #0
1507*c0909341SAndroid Build Coastguard Worker
1508*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1509*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1510*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x2], #16
1511*c0909341SAndroid Build Coastguard Worker.endr
1512*c0909341SAndroid Build Coastguard Worker
1513*c0909341SAndroid Build Coastguard Worker        blr             x4
1514*c0909341SAndroid Build Coastguard Worker
1515*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
1516*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #1
1517*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #1
1518*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #1
1519*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #1
1520*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #1
1521*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #1
1522*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #1
1523*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2,  v3,  v4,  v5
1524*c0909341SAndroid Build Coastguard Worker        blr             x5
1525*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
1526*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x6, x7
1527*c0909341SAndroid Build Coastguard Worker
1528*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v24.4s,  #1
1529*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v25.4s,  #1
1530*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v26.4s,  #1
1531*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v27.4s,  #1
1532*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v28.4s,  #1
1533*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v29.4s,  #1
1534*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v30.4s,  #1
1535*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v31.4s,  #1
1536*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2,  v3,  v4,  v5
1537*c0909341SAndroid Build Coastguard Worker        blr             x5
1538*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #16
1539*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x6, x7
1540*c0909341SAndroid Build Coastguard Worker
1541*c0909341SAndroid Build Coastguard Worker        ret             x15
1542*c0909341SAndroid Build Coastguard Workerendfunc
1543*c0909341SAndroid Build Coastguard Worker
1544*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x16_neon
1545*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #4]
1546*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1547*c0909341SAndroid Build Coastguard Worker
1548*c0909341SAndroid Build Coastguard Worker        mov             x11, #64
1549*c0909341SAndroid Build Coastguard Worker
1550*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1551*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #2]
1552*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1553*c0909341SAndroid Build Coastguard Worker
1554*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #48
1555*c0909341SAndroid Build Coastguard Worker        movi            v2.4s,   #0
1556*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
1557*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1558*c0909341SAndroid Build Coastguard Worker        st1             {v2.4s}, [x6], x11
1559*c0909341SAndroid Build Coastguard Worker.endr
1560*c0909341SAndroid Build Coastguard Worker        blr             x4
1561*c0909341SAndroid Build Coastguard Worker        sqrshrn         v28.4h,  v16.4s,  #1
1562*c0909341SAndroid Build Coastguard Worker        sqrshrn         v29.4h,  v17.4s,  #1
1563*c0909341SAndroid Build Coastguard Worker        sqrshrn         v30.4h,  v18.4s,  #1
1564*c0909341SAndroid Build Coastguard Worker        sqrshrn         v31.4h,  v19.4s,  #1
1565*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v28, v29, v30, v31, v4,  v5,  v6,  v7
1566*c0909341SAndroid Build Coastguard Worker
1567*c0909341SAndroid Build Coastguard Worker        b               2f
1568*c0909341SAndroid Build Coastguard Worker1:
1569*c0909341SAndroid Build Coastguard Worker.irp i, v28.4h, v29.4h, v30.4h, v31.4h
1570*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1571*c0909341SAndroid Build Coastguard Worker.endr
1572*c0909341SAndroid Build Coastguard Worker2:
1573*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1574*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #0]
1575*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1576*c0909341SAndroid Build Coastguard Worker
1577*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #32
1578*c0909341SAndroid Build Coastguard Worker        movi            v2.4s,   #0
1579*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
1580*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1581*c0909341SAndroid Build Coastguard Worker        st1             {v2.4s}, [x6], x11
1582*c0909341SAndroid Build Coastguard Worker.endr
1583*c0909341SAndroid Build Coastguard Worker        blr             x4
1584*c0909341SAndroid Build Coastguard Worker        sqrshrn         v24.4h,  v16.4s,  #1
1585*c0909341SAndroid Build Coastguard Worker        sqrshrn         v25.4h,  v17.4s,  #1
1586*c0909341SAndroid Build Coastguard Worker        sqrshrn         v26.4h,  v18.4s,  #1
1587*c0909341SAndroid Build Coastguard Worker        sqrshrn         v27.4h,  v19.4s,  #1
1588*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v24, v25, v26, v27, v4,  v5,  v6,  v7
1589*c0909341SAndroid Build Coastguard Worker
1590*c0909341SAndroid Build Coastguard Worker        b               2f
1591*c0909341SAndroid Build Coastguard Worker1:
1592*c0909341SAndroid Build Coastguard Worker.irp i, v24.4h, v25.4h, v26.4h, v27.4h
1593*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1594*c0909341SAndroid Build Coastguard Worker.endr
1595*c0909341SAndroid Build Coastguard Worker2:
1596*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1597*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1598*c0909341SAndroid Build Coastguard Worker
1599*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
1600*c0909341SAndroid Build Coastguard Worker        movi            v2.4s,   #0
1601*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
1602*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1603*c0909341SAndroid Build Coastguard Worker        st1             {v2.4s}, [x6], x11
1604*c0909341SAndroid Build Coastguard Worker.endr
1605*c0909341SAndroid Build Coastguard Worker        blr             x4
1606*c0909341SAndroid Build Coastguard Worker        sqrshrn         v20.4h,  v16.4s,  #1
1607*c0909341SAndroid Build Coastguard Worker        sqrshrn         v21.4h,  v17.4s,  #1
1608*c0909341SAndroid Build Coastguard Worker        sqrshrn         v22.4h,  v18.4s,  #1
1609*c0909341SAndroid Build Coastguard Worker        sqrshrn         v23.4h,  v19.4s,  #1
1610*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v20, v21, v22, v23, v4,  v5,  v6,  v7
1611*c0909341SAndroid Build Coastguard Worker
1612*c0909341SAndroid Build Coastguard Worker        b               2f
1613*c0909341SAndroid Build Coastguard Worker1:
1614*c0909341SAndroid Build Coastguard Worker.irp i, v20.4h, v21.4h, v22.4h, v23.4h
1615*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1616*c0909341SAndroid Build Coastguard Worker.endr
1617*c0909341SAndroid Build Coastguard Worker2:
1618*c0909341SAndroid Build Coastguard Worker
1619*c0909341SAndroid Build Coastguard Worker        movi            v2.4s,   #0
1620*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s
1621*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1622*c0909341SAndroid Build Coastguard Worker        st1             {v2.4s}, [x2], x11
1623*c0909341SAndroid Build Coastguard Worker.endr
1624*c0909341SAndroid Build Coastguard Worker        blr             x4
1625*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
1626*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #1
1627*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #1
1628*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #1
1629*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v4,  v5,  v6,  v7
1630*c0909341SAndroid Build Coastguard Worker
1631*c0909341SAndroid Build Coastguard Worker        blr             x5
1632*c0909341SAndroid Build Coastguard Worker
1633*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 x0, x6
1634*c0909341SAndroid Build Coastguard Worker
1635*c0909341SAndroid Build Coastguard Worker        ret             x15
1636*c0909341SAndroid Build Coastguard Workerendfunc
1637*c0909341SAndroid Build Coastguard Worker
1638*c0909341SAndroid Build Coastguard Workerconst eob_4x16
1639*c0909341SAndroid Build Coastguard Worker        .short 13, 29, 45, 64
1640*c0909341SAndroid Build Coastguard Workerendconst
1641*c0909341SAndroid Build Coastguard Worker
1642*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity1
1643*c0909341SAndroid Build Coastguard Worker        .short 16, 32, 48, 64
1644*c0909341SAndroid Build Coastguard Workerendconst
1645*c0909341SAndroid Build Coastguard Worker
1646*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity2
1647*c0909341SAndroid Build Coastguard Worker        .short 4, 8, 12, 64
1648*c0909341SAndroid Build Coastguard Workerendconst
1649*c0909341SAndroid Build Coastguard Worker
1650*c0909341SAndroid Build Coastguard Worker.macro def_fn_416 w, h, txfm1, txfm2
1651*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
1652*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1653*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1654*c0909341SAndroid Build Coastguard Worker.endif
1655*c0909341SAndroid Build Coastguard Worker.if \w == 4
1656*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x\w\()_neon
1657*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_4h_x\h\()_neon)
1658*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1659*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1660*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_4x16
1661*c0909341SAndroid Build Coastguard Worker.else
1662*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_4x16_identity1
1663*c0909341SAndroid Build Coastguard Worker.endif
1664*c0909341SAndroid Build Coastguard Worker.else
1665*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1666*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_4x16_identity2
1667*c0909341SAndroid Build Coastguard Worker.else
1668*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_4x16
1669*c0909341SAndroid Build Coastguard Worker.endif
1670*c0909341SAndroid Build Coastguard Worker.endif
1671*c0909341SAndroid Build Coastguard Worker.else
1672*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x\w\()_neon
1673*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_8h_x\h\()_neon)
1674*c0909341SAndroid Build Coastguard Worker.endif
1675*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1676*c0909341SAndroid Build Coastguard Workerendfunc
1677*c0909341SAndroid Build Coastguard Worker.endm
1678*c0909341SAndroid Build Coastguard Worker
1679*c0909341SAndroid Build Coastguard Worker.macro def_fns_416 w, h
1680*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, dct
1681*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, identity
1682*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, adst
1683*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, flipadst
1684*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, identity
1685*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, dct
1686*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, adst
1687*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, flipadst
1688*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, dct
1689*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, adst
1690*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, flipadst
1691*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, dct
1692*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, identity
1693*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, identity
1694*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, adst
1695*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, flipadst
1696*c0909341SAndroid Build Coastguard Worker.endm
1697*c0909341SAndroid Build Coastguard Worker
1698*c0909341SAndroid Build Coastguard Workerdef_fns_416 4, 16
1699*c0909341SAndroid Build Coastguard Workerdef_fns_416 16, 4
1700*c0909341SAndroid Build Coastguard Worker
1701*c0909341SAndroid Build Coastguard Worker
1702*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x8_neon
1703*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1704*c0909341SAndroid Build Coastguard Worker        stp             d8,  d9,  [sp, #-0x40]!
1705*c0909341SAndroid Build Coastguard Worker        stp             d10, d11, [sp, #0x10]
1706*c0909341SAndroid Build Coastguard Worker        stp             d12, d13, [sp, #0x20]
1707*c0909341SAndroid Build Coastguard Worker        stp             d14, d15, [sp, #0x30]
1708*c0909341SAndroid Build Coastguard Worker
1709*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
1710*c0909341SAndroid Build Coastguard Worker        mov             x11, #32
1711*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1712*c0909341SAndroid Build Coastguard Worker
1713*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,  #0
1714*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1715*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1716*c0909341SAndroid Build Coastguard Worker
1717*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
1718*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1719*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1720*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x6], x11
1721*c0909341SAndroid Build Coastguard Worker.endr
1722*c0909341SAndroid Build Coastguard Worker
1723*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1724*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31
1725*c0909341SAndroid Build Coastguard Worker        blr             x4
1726*c0909341SAndroid Build Coastguard Worker
1727*c0909341SAndroid Build Coastguard Worker        sqrshrn         v8.4h,   v16.4s,  #1
1728*c0909341SAndroid Build Coastguard Worker        sqrshrn         v9.4h,   v17.4s,  #1
1729*c0909341SAndroid Build Coastguard Worker        sqrshrn         v10.4h,  v18.4s,  #1
1730*c0909341SAndroid Build Coastguard Worker        sqrshrn         v11.4h,  v19.4s,  #1
1731*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v8.8h,   v20.4s,  #1
1732*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v9.8h,   v21.4s,  #1
1733*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v10.8h,  v22.4s,  #1
1734*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v11.8h,  v23.4s,  #1
1735*c0909341SAndroid Build Coastguard Worker        sqrshrn         v12.4h,  v24.4s,  #1
1736*c0909341SAndroid Build Coastguard Worker        sqrshrn         v13.4h,  v25.4s,  #1
1737*c0909341SAndroid Build Coastguard Worker        sqrshrn         v14.4h,  v26.4s,  #1
1738*c0909341SAndroid Build Coastguard Worker        sqrshrn         v15.4h,  v27.4s,  #1
1739*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v12.8h,  v28.4s,  #1
1740*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v13.8h,  v29.4s,  #1
1741*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v14.8h,  v30.4s,  #1
1742*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v15.8h,  v31.4s,  #1
1743*c0909341SAndroid Build Coastguard Worker
1744*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v8,  v9,  v10, v11, v2,  v3,  v4,  v5
1745*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v12, v13, v14, v15, v2,  v3,  v4,  v5
1746*c0909341SAndroid Build Coastguard Worker
1747*c0909341SAndroid Build Coastguard Worker        b               2f
1748*c0909341SAndroid Build Coastguard Worker1:
1749*c0909341SAndroid Build Coastguard Worker.irp i, v8.8h, v9.8h, v10.8h, v11.8h, v12.8h, v13.8h, v14.8h, v15.8h
1750*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1751*c0909341SAndroid Build Coastguard Worker.endr
1752*c0909341SAndroid Build Coastguard Worker2:
1753*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1754*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1755*c0909341SAndroid Build Coastguard Worker
1756*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,  #0
1757*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
1758*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1759*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x2], x11
1760*c0909341SAndroid Build Coastguard Worker.endr
1761*c0909341SAndroid Build Coastguard Worker
1762*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1763*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31
1764*c0909341SAndroid Build Coastguard Worker        blr             x4
1765*c0909341SAndroid Build Coastguard Worker
1766*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
1767*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #1
1768*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #1
1769*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #1
1770*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #1
1771*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #1
1772*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #1
1773*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #1
1774*c0909341SAndroid Build Coastguard Worker
1775*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v8.16b
1776*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v9.16b
1777*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v10.16b
1778*c0909341SAndroid Build Coastguard Worker        mov             v23.16b, v11.16b
1779*c0909341SAndroid Build Coastguard Worker
1780*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2,  v3,  v4,  v5
1781*c0909341SAndroid Build Coastguard Worker
1782*c0909341SAndroid Build Coastguard Worker        sqrshrn         v8.4h,   v24.4s,  #1
1783*c0909341SAndroid Build Coastguard Worker        sqrshrn         v9.4h,   v25.4s,  #1
1784*c0909341SAndroid Build Coastguard Worker        sqrshrn         v10.4h,  v26.4s,  #1
1785*c0909341SAndroid Build Coastguard Worker        sqrshrn         v11.4h,  v27.4s,  #1
1786*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v8.8h,   v28.4s,  #1
1787*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v9.8h,   v29.4s,  #1
1788*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v10.8h,  v30.4s,  #1
1789*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v11.8h,  v31.4s,  #1
1790*c0909341SAndroid Build Coastguard Worker
1791*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v8,  v9, v10, v11, v2,  v3,  v4,  v5
1792*c0909341SAndroid Build Coastguard Worker
1793*c0909341SAndroid Build Coastguard Worker        blr             x5
1794*c0909341SAndroid Build Coastguard Worker
1795*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
1796*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x6, x7
1797*c0909341SAndroid Build Coastguard Worker
1798*c0909341SAndroid Build Coastguard Worker        mov             v16.16b, v8.16b
1799*c0909341SAndroid Build Coastguard Worker        mov             v17.16b, v9.16b
1800*c0909341SAndroid Build Coastguard Worker        mov             v18.16b, v10.16b
1801*c0909341SAndroid Build Coastguard Worker        mov             v19.16b, v11.16b
1802*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v12.16b
1803*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v13.16b
1804*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v14.16b
1805*c0909341SAndroid Build Coastguard Worker        mov             v23.16b, v15.16b
1806*c0909341SAndroid Build Coastguard Worker
1807*c0909341SAndroid Build Coastguard Worker        blr             x5
1808*c0909341SAndroid Build Coastguard Worker
1809*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #16
1810*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7
1811*c0909341SAndroid Build Coastguard Worker
1812*c0909341SAndroid Build Coastguard Worker        ldp             d14, d15, [sp, #0x30]
1813*c0909341SAndroid Build Coastguard Worker        ldp             d12, d13, [sp, #0x20]
1814*c0909341SAndroid Build Coastguard Worker        ldp             d10, d11, [sp, #0x10]
1815*c0909341SAndroid Build Coastguard Worker        ldp             d8,  d9,  [sp], 0x40
1816*c0909341SAndroid Build Coastguard Worker        ret             x15
1817*c0909341SAndroid Build Coastguard Workerendfunc
1818*c0909341SAndroid Build Coastguard Worker
1819*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x16_neon
1820*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1821*c0909341SAndroid Build Coastguard Worker        stp             d8,  d9,  [sp, #-0x20]!
1822*c0909341SAndroid Build Coastguard Worker        stp             d10, d11, [sp, #0x10]
1823*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #4]
1824*c0909341SAndroid Build Coastguard Worker
1825*c0909341SAndroid Build Coastguard Worker        mov             x11, #64
1826*c0909341SAndroid Build Coastguard Worker
1827*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1828*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #2]
1829*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1830*c0909341SAndroid Build Coastguard Worker
1831*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #48
1832*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,   #0
1833*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1834*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1835*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
1836*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1837*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x6], x11
1838*c0909341SAndroid Build Coastguard Worker.endr
1839*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1840*c0909341SAndroid Build Coastguard Worker        blr             x4
1841*c0909341SAndroid Build Coastguard Worker
1842*c0909341SAndroid Build Coastguard Worker        sqrshrn         v28.4h,  v16.4s,  #1
1843*c0909341SAndroid Build Coastguard Worker        sqrshrn         v29.4h,  v17.4s,  #1
1844*c0909341SAndroid Build Coastguard Worker        sqrshrn         v30.4h,  v18.4s,  #1
1845*c0909341SAndroid Build Coastguard Worker        sqrshrn         v31.4h,  v19.4s,  #1
1846*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v28.8h,  v20.4s,  #1
1847*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v29.8h,  v21.4s,  #1
1848*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v30.8h,  v22.4s,  #1
1849*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v31.8h,  v23.4s,  #1
1850*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v28, v29, v30, v31, v2, v3, v4, v5
1851*c0909341SAndroid Build Coastguard Worker
1852*c0909341SAndroid Build Coastguard Worker        b               2f
1853*c0909341SAndroid Build Coastguard Worker
1854*c0909341SAndroid Build Coastguard Worker1:
1855*c0909341SAndroid Build Coastguard Worker.irp i, v28.8h, v29.8h, v30.8h, v31.8h
1856*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1857*c0909341SAndroid Build Coastguard Worker.endr
1858*c0909341SAndroid Build Coastguard Worker
1859*c0909341SAndroid Build Coastguard Worker2:
1860*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1861*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13, #0]
1862*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1863*c0909341SAndroid Build Coastguard Worker
1864*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #32
1865*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,   #0
1866*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1867*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1868*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
1869*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1870*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x6], x11
1871*c0909341SAndroid Build Coastguard Worker.endr
1872*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1873*c0909341SAndroid Build Coastguard Worker        blr             x4
1874*c0909341SAndroid Build Coastguard Worker
1875*c0909341SAndroid Build Coastguard Worker        sqrshrn         v24.4h,  v16.4s,  #1
1876*c0909341SAndroid Build Coastguard Worker        sqrshrn         v25.4h,  v17.4s,  #1
1877*c0909341SAndroid Build Coastguard Worker        sqrshrn         v26.4h,  v18.4s,  #1
1878*c0909341SAndroid Build Coastguard Worker        sqrshrn         v27.4h,  v19.4s,  #1
1879*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v24.8h,  v20.4s,  #1
1880*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v25.8h,  v21.4s,  #1
1881*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v26.8h,  v22.4s,  #1
1882*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v27.8h,  v23.4s,  #1
1883*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v24, v25, v26, v27, v2, v3, v4, v5
1884*c0909341SAndroid Build Coastguard Worker
1885*c0909341SAndroid Build Coastguard Worker        b               2f
1886*c0909341SAndroid Build Coastguard Worker
1887*c0909341SAndroid Build Coastguard Worker1:
1888*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h
1889*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1890*c0909341SAndroid Build Coastguard Worker.endr
1891*c0909341SAndroid Build Coastguard Worker
1892*c0909341SAndroid Build Coastguard Worker2:
1893*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
1894*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1895*c0909341SAndroid Build Coastguard Worker
1896*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
1897*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,   #0
1898*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1899*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1900*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
1901*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1902*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x6], x11
1903*c0909341SAndroid Build Coastguard Worker.endr
1904*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1905*c0909341SAndroid Build Coastguard Worker        blr             x4
1906*c0909341SAndroid Build Coastguard Worker
1907*c0909341SAndroid Build Coastguard Worker        sqrshrn         v8.4h,   v16.4s,  #1
1908*c0909341SAndroid Build Coastguard Worker        sqrshrn         v9.4h,   v17.4s,  #1
1909*c0909341SAndroid Build Coastguard Worker        sqrshrn         v10.4h,  v18.4s,  #1
1910*c0909341SAndroid Build Coastguard Worker        sqrshrn         v11.4h,  v19.4s,  #1
1911*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v8.8h,   v20.4s,  #1
1912*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v9.8h,   v21.4s,  #1
1913*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v10.8h,  v22.4s,  #1
1914*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v11.8h,  v23.4s,  #1
1915*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v8,  v9,  v10, v11, v2, v3, v4, v5
1916*c0909341SAndroid Build Coastguard Worker
1917*c0909341SAndroid Build Coastguard Worker        b               2f
1918*c0909341SAndroid Build Coastguard Worker
1919*c0909341SAndroid Build Coastguard Worker1:
1920*c0909341SAndroid Build Coastguard Worker.irp i, v8.8h, v9.8h, v10.8h, v11.8h
1921*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1922*c0909341SAndroid Build Coastguard Worker.endr
1923*c0909341SAndroid Build Coastguard Worker
1924*c0909341SAndroid Build Coastguard Worker2:
1925*c0909341SAndroid Build Coastguard Worker        movi            v4.4s,   #0
1926*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
1927*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
1928*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
1929*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1930*c0909341SAndroid Build Coastguard Worker        st1             {v4.4s}, [x2], x11
1931*c0909341SAndroid Build Coastguard Worker.endr
1932*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
1933*c0909341SAndroid Build Coastguard Worker        blr             x4
1934*c0909341SAndroid Build Coastguard Worker
1935*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
1936*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #1
1937*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #1
1938*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #1
1939*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #1
1940*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #1
1941*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #1
1942*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #1
1943*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2, v3, v4, v5
1944*c0909341SAndroid Build Coastguard Worker
1945*c0909341SAndroid Build Coastguard Worker        mov             v20.16b, v8.16b
1946*c0909341SAndroid Build Coastguard Worker        mov             v21.16b, v9.16b
1947*c0909341SAndroid Build Coastguard Worker        mov             v22.16b, v10.16b
1948*c0909341SAndroid Build Coastguard Worker        mov             v23.16b, v11.16b
1949*c0909341SAndroid Build Coastguard Worker
1950*c0909341SAndroid Build Coastguard Worker        blr             x5
1951*c0909341SAndroid Build Coastguard Worker
1952*c0909341SAndroid Build Coastguard Worker        load_add_store_8x16 x0, x6
1953*c0909341SAndroid Build Coastguard Worker
1954*c0909341SAndroid Build Coastguard Worker        ldp             d10, d11, [sp, #0x10]
1955*c0909341SAndroid Build Coastguard Worker        ldp             d8,  d9,  [sp], 0x20
1956*c0909341SAndroid Build Coastguard Worker
1957*c0909341SAndroid Build Coastguard Worker        ret             x15
1958*c0909341SAndroid Build Coastguard Workerendfunc
1959*c0909341SAndroid Build Coastguard Worker
1960*c0909341SAndroid Build Coastguard Workerconst eob_8x16
1961*c0909341SAndroid Build Coastguard Worker        .short 10, 43, 75, 128
1962*c0909341SAndroid Build Coastguard Workerendconst
1963*c0909341SAndroid Build Coastguard Worker
1964*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity1
1965*c0909341SAndroid Build Coastguard Worker        .short 4, 64, 96, 128
1966*c0909341SAndroid Build Coastguard Workerendconst
1967*c0909341SAndroid Build Coastguard Worker
1968*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity2
1969*c0909341SAndroid Build Coastguard Worker        .short 4, 8, 12, 128
1970*c0909341SAndroid Build Coastguard Workerendconst
1971*c0909341SAndroid Build Coastguard Worker
1972*c0909341SAndroid Build Coastguard Worker.macro def_fn_816 w, h, txfm1, txfm2
1973*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
1974*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1975*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1976*c0909341SAndroid Build Coastguard Worker.endif
1977*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4s_x\w\()_neon
1978*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_\txfm2\()_8h_x\h\()_neon)
1979*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1980*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1981*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x16
1982*c0909341SAndroid Build Coastguard Worker.else
1983*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x16_identity1
1984*c0909341SAndroid Build Coastguard Worker.endif
1985*c0909341SAndroid Build Coastguard Worker.else
1986*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1987*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x16_identity2
1988*c0909341SAndroid Build Coastguard Worker.else
1989*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x16
1990*c0909341SAndroid Build Coastguard Worker.endif
1991*c0909341SAndroid Build Coastguard Worker.endif
1992*c0909341SAndroid Build Coastguard Worker.if \h == 8
1993*c0909341SAndroid Build Coastguard Worker        ldrh            w13, [x13]
1994*c0909341SAndroid Build Coastguard Worker.endif
1995*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1996*c0909341SAndroid Build Coastguard Workerendfunc
1997*c0909341SAndroid Build Coastguard Worker.endm
1998*c0909341SAndroid Build Coastguard Worker
1999*c0909341SAndroid Build Coastguard Worker.macro def_fns_816 w, h
2000*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, dct
2001*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, identity
2002*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, adst
2003*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, flipadst
2004*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, identity
2005*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, dct
2006*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, adst
2007*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, flipadst
2008*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, dct
2009*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, adst
2010*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, flipadst
2011*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, dct
2012*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, identity
2013*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, identity
2014*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, adst
2015*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, flipadst
2016*c0909341SAndroid Build Coastguard Worker.endm
2017*c0909341SAndroid Build Coastguard Worker
2018*c0909341SAndroid Build Coastguard Workerdef_fns_816 8, 16
2019*c0909341SAndroid Build Coastguard Workerdef_fns_816 16, 8
2020*c0909341SAndroid Build Coastguard Worker
2021*c0909341SAndroid Build Coastguard Workerfunction inv_dct32_odd_4s_x16_neon
2022*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs, 4*16
2023*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16], #32
2024*c0909341SAndroid Build Coastguard Worker
2025*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v16, v31, v0.s[0], v0.s[1] // -> t16a
2026*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v16, v31, v0.s[1], v0.s[0] // -> t31a
2027*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v24, v23, v0.s[2], v0.s[3] // -> t17a
2028*c0909341SAndroid Build Coastguard Worker        srshr           v16.4s, v2.4s,  #12             // t16a
2029*c0909341SAndroid Build Coastguard Worker        srshr           v31.4s, v4.4s,  #12             // t31a
2030*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v24, v23, v0.s[3], v0.s[2] // -> t30a
2031*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v20, v27, v1.s[0], v1.s[1] // -> t18a
2032*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v6.4s,  #12             // t17a
2033*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v2.4s,  #12             // t30a
2034*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v20, v27, v1.s[1], v1.s[0] // -> t29a
2035*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v28, v19, v1.s[2], v1.s[3] // -> t19a
2036*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v4.4s,  #12             // t18a
2037*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t29a
2038*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v28, v19, v1.s[3], v1.s[2] // -> t28a
2039*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
2040*c0909341SAndroid Build Coastguard Worker        sub             x16, x16, #4*24
2041*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v18, v29, v0.s[0], v0.s[1] // -> t20a
2042*c0909341SAndroid Build Coastguard Worker        srshr           v28.4s, v2.4s,  #12             // t19a
2043*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v4.4s,  #12             // t28a
2044*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v18, v29, v0.s[1], v0.s[0] // -> t27a
2045*c0909341SAndroid Build Coastguard Worker        mul_mls         v4,  v26, v21, v0.s[2], v0.s[3] // -> t21a
2046*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v6.4s,  #12             // t20a
2047*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v2.4s,  #12             // t27a
2048*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v26, v21, v0.s[3], v0.s[2] // -> t26a
2049*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v22, v25, v1.s[0], v1.s[1] // -> t22a
2050*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v4.4s,  #12             // t21a
2051*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v6.4s,  #12             // t26a
2052*c0909341SAndroid Build Coastguard Worker        mul_mla         v4,  v22, v25, v1.s[1], v1.s[0] // -> t25a
2053*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v30, v17, v1.s[2], v1.s[3] // -> t23a
2054*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v2.4s,  #12             // t22a
2055*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v4.4s,  #12             // t25a
2056*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v30, v17, v1.s[3], v1.s[2] // -> t24a
2057*c0909341SAndroid Build Coastguard Worker        srshr           v30.4s, v6.4s,  #12             // t23a
2058*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v2.4s,  #12             // t24a
2059*c0909341SAndroid Build Coastguard Worker
2060*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x16]
2061*c0909341SAndroid Build Coastguard Worker
2062*c0909341SAndroid Build Coastguard Worker        movi            v5.4s,   #1, msl #16     // row_clip_max = ~(~bdmax << 7), 0x1ffff
2063*c0909341SAndroid Build Coastguard Worker        mvni            v4.4s,   #1, msl #16     // row_clip_min = (~bdmax << 7), 0xfffe0000
2064*c0909341SAndroid Build Coastguard Worker
2065*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v16.4s,  v24.4s // t17
2066*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v24.4s // t16
2067*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v23.4s // t30
2068*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v23.4s // t31
2069*c0909341SAndroid Build Coastguard Worker        sqsub           v24.4s,  v28.4s,  v20.4s // t18
2070*c0909341SAndroid Build Coastguard Worker        sqadd           v28.4s,  v28.4s,  v20.4s // t19
2071*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v18.4s,  v26.4s // t20
2072*c0909341SAndroid Build Coastguard Worker        sqsub           v18.4s,  v18.4s,  v26.4s // t21
2073*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,  v30.4s,  v22.4s // t22
2074*c0909341SAndroid Build Coastguard Worker        sqadd           v30.4s,  v30.4s,  v22.4s // t23
2075*c0909341SAndroid Build Coastguard Worker        sqadd           v26.4s,  v17.4s,  v25.4s // t24
2076*c0909341SAndroid Build Coastguard Worker        sqsub           v17.4s,  v17.4s,  v25.4s // t25
2077*c0909341SAndroid Build Coastguard Worker        sqsub           v22.4s,  v29.4s,  v21.4s // t26
2078*c0909341SAndroid Build Coastguard Worker        sqadd           v29.4s,  v29.4s,  v21.4s // t27
2079*c0909341SAndroid Build Coastguard Worker        sqadd           v25.4s,  v19.4s,  v27.4s // t28
2080*c0909341SAndroid Build Coastguard Worker        sqsub           v19.4s,  v19.4s,  v27.4s // t29
2081*c0909341SAndroid Build Coastguard Worker
2082*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v24, v28, v23, v18, v20, v30, v26, v17, v22, v29, v25, v19
2083*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s, \r\().4s, v5.4s
2084*c0909341SAndroid Build Coastguard Worker.endr
2085*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v24, v28, v23, v18, v20, v30, v26, v17, v22, v29, v25, v19
2086*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s, \r\().4s, v4.4s
2087*c0909341SAndroid Build Coastguard Worker.endr
2088*c0909341SAndroid Build Coastguard Worker
2089*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v3,  v2,  v1.s[0], v1.s[1] // -> t17a
2090*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v3,  v2,  v1.s[1], v1.s[0] // -> t30a
2091*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v19, v24, v1.s[1], v1.s[0] // -> t18a
2092*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v7.4s,  #12             // t17a
2093*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v6.4s,  #12             // t30a
2094*c0909341SAndroid Build Coastguard Worker        neg             v2.4s,   v2.4s                  // -> t18a
2095*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v19, v24, v1.s[0], v1.s[1] // -> t29a
2096*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v22, v18, v1.s[2], v1.s[3] // -> t21a
2097*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v2.4s,  #12             // t18a
2098*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v7.4s,  #12             // t29a
2099*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v22, v18, v1.s[3], v1.s[2] // -> t26a
2100*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v17, v20, v1.s[3], v1.s[2] // -> t22a
2101*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v6.4s,  #12             // t21a
2102*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v2.4s,  #12             // t26a
2103*c0909341SAndroid Build Coastguard Worker        neg             v7.4s,   v7.4s                  // -> t22a
2104*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v17, v20, v1.s[2], v1.s[3] // -> t25a
2105*c0909341SAndroid Build Coastguard Worker        srshr           v17.4s, v7.4s,  #12             // t22a
2106*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v6.4s,  #12             // t25a
2107*c0909341SAndroid Build Coastguard Worker
2108*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v27.4s,  v24.4s // t29
2109*c0909341SAndroid Build Coastguard Worker        sqadd           v27.4s,  v27.4s,  v24.4s // t30
2110*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v21.4s,  v19.4s // t18
2111*c0909341SAndroid Build Coastguard Worker        sqadd           v21.4s,  v21.4s,  v19.4s // t17
2112*c0909341SAndroid Build Coastguard Worker        sqsub           v24.4s,  v16.4s,  v28.4s // t19a
2113*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v28.4s // t16a
2114*c0909341SAndroid Build Coastguard Worker        sqsub           v19.4s,  v30.4s,  v23.4s // t20a
2115*c0909341SAndroid Build Coastguard Worker        sqadd           v30.4s,  v30.4s,  v23.4s // t23a
2116*c0909341SAndroid Build Coastguard Worker        sqsub           v28.4s,  v17.4s,  v22.4s // t21
2117*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v17.4s,  v22.4s // t22
2118*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v26.4s,  v29.4s // t24a
2119*c0909341SAndroid Build Coastguard Worker        sqsub           v26.4s,  v26.4s,  v29.4s // t27a
2120*c0909341SAndroid Build Coastguard Worker        sqadd           v22.4s,  v20.4s,  v18.4s // t25
2121*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,  v20.4s,  v18.4s // t26
2122*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v31.4s,  v25.4s // t28a
2123*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v25.4s // t31a
2124*c0909341SAndroid Build Coastguard Worker
2125*c0909341SAndroid Build Coastguard Worker.irp r, v2, v27, v3, v21, v24, v16, v19, v30, v28, v17, v23, v26, v22, v20, v29, v31
2126*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s, \r\().4s, v5.4s
2127*c0909341SAndroid Build Coastguard Worker.endr
2128*c0909341SAndroid Build Coastguard Worker.irp r, v2, v27, v3, v21, v24, v16, v19, v30, v28, v17, v23, v26, v22, v20, v29, v31
2129*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s, \r\().4s, v4.4s
2130*c0909341SAndroid Build Coastguard Worker.endr
2131*c0909341SAndroid Build Coastguard Worker
2132*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v2,  v3,  v0.s[2], v0.s[3] // -> t18a
2133*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v2,  v3,  v0.s[3], v0.s[2] // -> t29a
2134*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v29, v24, v0.s[2], v0.s[3] // -> t19
2135*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v7.4s,  #12             // t18a
2136*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v6.4s,  #12             // t29a
2137*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v29, v24, v0.s[3], v0.s[2] // -> t28
2138*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v26, v19, v0.s[3], v0.s[2] // -> t20
2139*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v2.4s,  #12             // t19
2140*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v7.4s,  #12             // t28
2141*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s                  // -> t20
2142*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v26, v19, v0.s[2], v0.s[3] // -> t27
2143*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v20, v28, v0.s[3], v0.s[2] // -> t21a
2144*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v6.4s,  #12             // t20
2145*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v2.4s,  #12             // t27
2146*c0909341SAndroid Build Coastguard Worker        neg             v7.4s,   v7.4s                  // -> t21a
2147*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v20, v28, v0.s[2], v0.s[3] // -> t26a
2148*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v7.4s,  #12             // t21a
2149*c0909341SAndroid Build Coastguard Worker        srshr           v28.4s, v6.4s,  #12             // t26a
2150*c0909341SAndroid Build Coastguard Worker
2151*c0909341SAndroid Build Coastguard Worker        sqsub           v2.4s,   v16.4s,  v30.4s // t23
2152*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v16.4s,  v30.4s // t16 = out16
2153*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   v31.4s,  v23.4s // t24
2154*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v31.4s,  v23.4s // t31 = out31
2155*c0909341SAndroid Build Coastguard Worker        sqsub           v23.4s,  v21.4s,  v17.4s // t22a
2156*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v21.4s,  v17.4s // t17a = out17
2157*c0909341SAndroid Build Coastguard Worker        sqadd           v30.4s,  v27.4s,  v22.4s // t30a = out30
2158*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,  v27.4s,  v22.4s // t25a
2159*c0909341SAndroid Build Coastguard Worker        sqsub           v27.4s,  v18.4s,  v20.4s // t21
2160*c0909341SAndroid Build Coastguard Worker        sqadd           v18.4s,  v18.4s,  v20.4s // t18 = out18
2161*c0909341SAndroid Build Coastguard Worker        sqadd           v7.4s,   v29.4s,  v26.4s // t19a = out19
2162*c0909341SAndroid Build Coastguard Worker        sqsub           v26.4s,  v29.4s,  v26.4s // t20a
2163*c0909341SAndroid Build Coastguard Worker        sqadd           v29.4s,  v25.4s,  v28.4s // t29 = out29
2164*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v25.4s,  v28.4s // t26
2165*c0909341SAndroid Build Coastguard Worker        sqadd           v28.4s,  v24.4s,  v19.4s // t28a = out28
2166*c0909341SAndroid Build Coastguard Worker        sqsub           v24.4s,  v24.4s,  v19.4s // t27a
2167*c0909341SAndroid Build Coastguard Worker        mov             v19.16b, v7.16b          // out19
2168*c0909341SAndroid Build Coastguard Worker
2169*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v17, v30, v21, v27, v18, v19, v26, v29, v25, v28, v24
2170*c0909341SAndroid Build Coastguard Worker        smin            \r\().4s, \r\().4s, v5.4s
2171*c0909341SAndroid Build Coastguard Worker.endr
2172*c0909341SAndroid Build Coastguard Worker.irp r, v2, v16, v3, v31, v23, v17, v30, v21, v27, v18, v19, v26, v29, v25, v28, v24
2173*c0909341SAndroid Build Coastguard Worker        smax            \r\().4s, \r\().4s, v4.4s
2174*c0909341SAndroid Build Coastguard Worker.endr
2175*c0909341SAndroid Build Coastguard Worker
2176*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v24, v26, v0.s[0], v0.s[0] // -> t20
2177*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v24, v26, v0.s[0], v0.s[0] // -> t27
2178*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v7.4s,  #12             // t20
2179*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v6.4s,  #12             // t27
2180*c0909341SAndroid Build Coastguard Worker
2181*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v25, v27, v0.s[0], v0.s[0] // -> t26a
2182*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v25, v27, v0.s[0], v0.s[0] // -> t21a
2183*c0909341SAndroid Build Coastguard Worker        mov             v27.16b,  v22.16b               // t27
2184*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v7.4s,  #12             // t26a
2185*c0909341SAndroid Build Coastguard Worker
2186*c0909341SAndroid Build Coastguard Worker        mul_mls         v24, v21, v23, v0.s[0], v0.s[0] // -> t22
2187*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v21, v23, v0.s[0], v0.s[0] // -> t25
2188*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v6.4s,  #12             // t21a
2189*c0909341SAndroid Build Coastguard Worker        srshr           v22.4s, v24.4s, #12             // t22
2190*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v7.4s,  #12             // t25
2191*c0909341SAndroid Build Coastguard Worker
2192*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v3,  v2,  v0.s[0], v0.s[0] // -> t23a
2193*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v3,  v2,  v0.s[0], v0.s[0] // -> t24a
2194*c0909341SAndroid Build Coastguard Worker        srshr           v23.4s, v7.4s,  #12             // t23a
2195*c0909341SAndroid Build Coastguard Worker        srshr           v24.4s, v6.4s,  #12             // t24a
2196*c0909341SAndroid Build Coastguard Worker
2197*c0909341SAndroid Build Coastguard Worker        ret
2198*c0909341SAndroid Build Coastguard Workerendfunc
2199*c0909341SAndroid Build Coastguard Worker
2200*c0909341SAndroid Build Coastguard Worker.macro def_horz_32 scale=0, shift=2, suffix
2201*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_dct_32x4_neon
2202*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2203*c0909341SAndroid Build Coastguard Worker        movi            v7.4s,  #0
2204*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
2205*c0909341SAndroid Build Coastguard Worker.if \scale
2206*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
2207*c0909341SAndroid Build Coastguard Worker        dup             v0.2s,   w16
2208*c0909341SAndroid Build Coastguard Worker.endif
2209*c0909341SAndroid Build Coastguard Worker
2210*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
2211*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
2212*c0909341SAndroid Build Coastguard Worker        st1             {v7.4s}, [x7], x8
2213*c0909341SAndroid Build Coastguard Worker.endr
2214*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2215*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
2216*c0909341SAndroid Build Coastguard Worker.if \scale
2217*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
2218*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[0], v24, v25, v26, v27, v28, v29, v30, v31
2219*c0909341SAndroid Build Coastguard Worker.endif
2220*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4s_x16_neon
2221*c0909341SAndroid Build Coastguard Worker
2222*c0909341SAndroid Build Coastguard Worker        // idct_16 leaves the row_clip_max/min constants in v5 and v4
2223*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
2224*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
2225*c0909341SAndroid Build Coastguard Worker.endr
2226*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
2227*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
2228*c0909341SAndroid Build Coastguard Worker.endr
2229*c0909341SAndroid Build Coastguard Worker
2230*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v16, v17, v18, v19, v2,  v3,  v4,  v5
2231*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v20, v21, v22, v23, v2,  v3,  v4,  v5
2232*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v24, v25, v26, v27, v2,  v3,  v4,  v5
2233*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v28, v29, v30, v31, v2,  v3,  v4,  v5
2234*c0909341SAndroid Build Coastguard Worker
2235*c0909341SAndroid Build Coastguard Worker.macro store1 r0, r1, r2, r3
2236*c0909341SAndroid Build Coastguard Worker        st1             {\r0}, [x6], #16
2237*c0909341SAndroid Build Coastguard Worker        st1             {\r1}, [x6], #16
2238*c0909341SAndroid Build Coastguard Worker        st1             {\r2}, [x6], #16
2239*c0909341SAndroid Build Coastguard Worker        st1             {\r3}, [x6], #16
2240*c0909341SAndroid Build Coastguard Worker.endm
2241*c0909341SAndroid Build Coastguard Worker        store1          v16.4s,  v20.4s,  v24.4s,  v28.4s
2242*c0909341SAndroid Build Coastguard Worker        store1          v17.4s,  v21.4s,  v25.4s,  v29.4s
2243*c0909341SAndroid Build Coastguard Worker        store1          v18.4s,  v22.4s,  v26.4s,  v30.4s
2244*c0909341SAndroid Build Coastguard Worker        store1          v19.4s,  v23.4s,  v27.4s,  v31.4s
2245*c0909341SAndroid Build Coastguard Worker.purgem store1
2246*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #64*4
2247*c0909341SAndroid Build Coastguard Worker
2248*c0909341SAndroid Build Coastguard Worker        movi            v7.4s,  #0
2249*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
2250*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
2251*c0909341SAndroid Build Coastguard Worker        st1             {v7.4s}, [x7], x8
2252*c0909341SAndroid Build Coastguard Worker.endr
2253*c0909341SAndroid Build Coastguard Worker.if \scale
2254*c0909341SAndroid Build Coastguard Worker        // This relies on the fact that the idct also leaves the right coeff in v0.s[1]
2255*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[1], v16, v17, v18, v19, v20, v21, v22, v23
2256*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v0.s[1], v24, v25, v26, v27, v28, v29, v30, v31
2257*c0909341SAndroid Build Coastguard Worker.endif
2258*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_4s_x16_neon
2259*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v31, v30, v29, v28, v2,  v3,  v4,  v5
2260*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v27, v26, v25, v24, v2,  v3,  v4,  v5
2261*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v23, v22, v21, v20, v2,  v3,  v4,  v5
2262*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v19, v18, v17, v16, v2,  v3,  v4,  v5
2263*c0909341SAndroid Build Coastguard Worker.macro store2 r0, r1, r2, r3, shift
2264*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x6]
2265*c0909341SAndroid Build Coastguard Worker        sqsub           v4.4s,   v0.4s,   \r0
2266*c0909341SAndroid Build Coastguard Worker        sqadd           v0.4s,   v0.4s,   \r0
2267*c0909341SAndroid Build Coastguard Worker        sqsub           v5.4s,   v1.4s,   \r1
2268*c0909341SAndroid Build Coastguard Worker        sqadd           v1.4s,   v1.4s,   \r1
2269*c0909341SAndroid Build Coastguard Worker        sqsub           v6.4s,   v2.4s,   \r2
2270*c0909341SAndroid Build Coastguard Worker        sqadd           v2.4s,   v2.4s,   \r2
2271*c0909341SAndroid Build Coastguard Worker        sqsub           v7.4s,   v3.4s,   \r3
2272*c0909341SAndroid Build Coastguard Worker        sqadd           v3.4s,   v3.4s,   \r3
2273*c0909341SAndroid Build Coastguard Worker        sqrshrn         v0.4h,   v0.4s,   #\shift
2274*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v0.8h,   v1.4s,   #\shift
2275*c0909341SAndroid Build Coastguard Worker        sqrshrn         v1.4h,   v2.4s,   #\shift
2276*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v1.8h,   v3.4s,   #\shift
2277*c0909341SAndroid Build Coastguard Worker        sqrshrn         v2.4h,   v7.4s,   #\shift
2278*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v2.8h,   v6.4s,   #\shift
2279*c0909341SAndroid Build Coastguard Worker        sqrshrn         v3.4h,   v5.4s,   #\shift
2280*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v3.8h,   v4.4s,   #\shift
2281*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h, v1.8h}, [x6], #32
2282*c0909341SAndroid Build Coastguard Worker        rev64           v2.8h,   v2.8h
2283*c0909341SAndroid Build Coastguard Worker        rev64           v3.8h,   v3.8h
2284*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h, v3.8h}, [x6], #32
2285*c0909341SAndroid Build Coastguard Worker.endm
2286*c0909341SAndroid Build Coastguard Worker
2287*c0909341SAndroid Build Coastguard Worker        store2          v31.4s,  v27.4s,  v23.4s,  v19.4s,  \shift
2288*c0909341SAndroid Build Coastguard Worker        store2          v30.4s,  v26.4s,  v22.4s,  v18.4s,  \shift
2289*c0909341SAndroid Build Coastguard Worker        store2          v29.4s,  v25.4s,  v21.4s,  v17.4s,  \shift
2290*c0909341SAndroid Build Coastguard Worker        store2          v28.4s,  v24.4s,  v20.4s,  v16.4s,  \shift
2291*c0909341SAndroid Build Coastguard Worker.purgem store2
2292*c0909341SAndroid Build Coastguard Worker        ret             x14
2293*c0909341SAndroid Build Coastguard Workerendfunc
2294*c0909341SAndroid Build Coastguard Worker.endm
2295*c0909341SAndroid Build Coastguard Worker
2296*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=0, shift=2
2297*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=1, shift=1, suffix=_scale
2298*c0909341SAndroid Build Coastguard Worker
2299*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x32_neon
2300*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2301*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
2302*c0909341SAndroid Build Coastguard Worker
2303*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2304*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2305*c0909341SAndroid Build Coastguard Worker.endr
2306*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2307*c0909341SAndroid Build Coastguard Worker
2308*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct_8h_x16_neon)
2309*c0909341SAndroid Build Coastguard Worker
2310*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2311*c0909341SAndroid Build Coastguard Worker        st1             {v\i\().8h}, [x7], x8
2312*c0909341SAndroid Build Coastguard Worker.endr
2313*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2314*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
2315*c0909341SAndroid Build Coastguard Worker
2316*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2317*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2318*c0909341SAndroid Build Coastguard Worker.endr
2319*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2320*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsr #1
2321*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct32_odd_8h_x16_neon)
2322*c0909341SAndroid Build Coastguard Worker
2323*c0909341SAndroid Build Coastguard Worker        neg             x9,  x8
2324*c0909341SAndroid Build Coastguard Worker        mov             x10, x6
2325*c0909341SAndroid Build Coastguard Worker        mvni            v1.8h,   #0xfc, lsl #8 // 0x3ff
2326*c0909341SAndroid Build Coastguard Worker.macro combine r0, r1, r2, r3, op, stride
2327*c0909341SAndroid Build Coastguard Worker        ld1             {v5.8h}, [x7],    \stride
2328*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h}, [x10],   x1
2329*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8h}, [x7],    \stride
2330*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8h}, [x10],   x1
2331*c0909341SAndroid Build Coastguard Worker        \op             v5.8h,   v5.8h,   \r0
2332*c0909341SAndroid Build Coastguard Worker        ld1             {v7.8h}, [x7],    \stride
2333*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h}, [x10],   x1
2334*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
2335*c0909341SAndroid Build Coastguard Worker        \op             v6.8h,   v6.8h,   \r1
2336*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v5.8h
2337*c0909341SAndroid Build Coastguard Worker        srshr           v6.8h,   v6.8h,   #4
2338*c0909341SAndroid Build Coastguard Worker        \op             v7.8h,   v7.8h,   \r2
2339*c0909341SAndroid Build Coastguard Worker        ld1             {v5.8h}, [x7],    \stride
2340*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   v6.8h
2341*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v1.8h
2342*c0909341SAndroid Build Coastguard Worker        srshr           v7.8h,   v7.8h,   #4
2343*c0909341SAndroid Build Coastguard Worker        \op             v5.8h,   v5.8h,   \r3
2344*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6],    x1
2345*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h}, [x10],   x1
2346*c0909341SAndroid Build Coastguard Worker        usqadd          v4.8h,   v7.8h
2347*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v1.8h
2348*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
2349*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h}, [x6],    x1
2350*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v5.8h
2351*c0909341SAndroid Build Coastguard Worker        smin            v4.8h,   v4.8h,   v1.8h
2352*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [x6],    x1
2353*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v1.8h
2354*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6],    x1
2355*c0909341SAndroid Build Coastguard Worker.endm
2356*c0909341SAndroid Build Coastguard Worker        combine         v31.8h, v30.8h, v29.8h, v28.8h, sqadd, x8
2357*c0909341SAndroid Build Coastguard Worker        combine         v27.8h, v26.8h, v25.8h, v24.8h, sqadd, x8
2358*c0909341SAndroid Build Coastguard Worker        combine         v23.8h, v22.8h, v21.8h, v20.8h, sqadd, x8
2359*c0909341SAndroid Build Coastguard Worker        combine         v19.8h, v18.8h, v17.8h, v16.8h, sqadd, x8
2360*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8
2361*c0909341SAndroid Build Coastguard Worker        combine         v16.8h, v17.8h, v18.8h, v19.8h, sqsub, x9
2362*c0909341SAndroid Build Coastguard Worker        combine         v20.8h, v21.8h, v22.8h, v23.8h, sqsub, x9
2363*c0909341SAndroid Build Coastguard Worker        combine         v24.8h, v25.8h, v26.8h, v27.8h, sqsub, x9
2364*c0909341SAndroid Build Coastguard Worker        combine         v28.8h, v29.8h, v30.8h, v31.8h, sqsub, x9
2365*c0909341SAndroid Build Coastguard Worker.purgem combine
2366*c0909341SAndroid Build Coastguard Worker
2367*c0909341SAndroid Build Coastguard Worker        ret             x14
2368*c0909341SAndroid Build Coastguard Workerendfunc
2369*c0909341SAndroid Build Coastguard Worker
2370*c0909341SAndroid Build Coastguard Workerconst eob_32x32
2371*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 136, 210, 300, 406, 1024
2372*c0909341SAndroid Build Coastguard Workerendconst
2373*c0909341SAndroid Build Coastguard Worker
2374*c0909341SAndroid Build Coastguard Workerconst eob_16x32
2375*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 151, 215, 279, 343, 512
2376*c0909341SAndroid Build Coastguard Workerendconst
2377*c0909341SAndroid Build Coastguard Worker
2378*c0909341SAndroid Build Coastguard Workerconst eob_16x32_shortside
2379*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 512
2380*c0909341SAndroid Build Coastguard Workerendconst
2381*c0909341SAndroid Build Coastguard Worker
2382*c0909341SAndroid Build Coastguard Workerconst eob_8x32
2383*c0909341SAndroid Build Coastguard Worker        .short 10, 43, 75, 107, 139, 171, 203, 256
2384*c0909341SAndroid Build Coastguard Workerendconst
2385*c0909341SAndroid Build Coastguard Worker
2386*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_32x32_16bpc_neon, export=1
2387*c0909341SAndroid Build Coastguard Worker        movi            v0.8h,  #0
2388*c0909341SAndroid Build Coastguard Worker        movi            v1.8h,  #0
2389*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32, 2
2390*c0909341SAndroid Build Coastguard Worker
2391*c0909341SAndroid Build Coastguard Worker        mov             x8,  #4*32
2392*c0909341SAndroid Build Coastguard Worker1:
2393*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2394*c0909341SAndroid Build Coastguard Worker        movrel          x12, eob_32x32, 2
2395*c0909341SAndroid Build Coastguard Worker2:
2396*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2397*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s, v17.4s}, [x2]
2398*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2399*c0909341SAndroid Build Coastguard Worker        ld1             {v18.4s, v19.4s}, [x2]
2400*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2401*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4s, v21.4s}, [x2]
2402*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2403*c0909341SAndroid Build Coastguard Worker        ld1             {v22.4s, v23.4s}, [x2]
2404*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2405*c0909341SAndroid Build Coastguard Worker        ld1             {v24.4s, v25.4s}, [x2]
2406*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2407*c0909341SAndroid Build Coastguard Worker        ld1             {v26.4s, v27.4s}, [x2]
2408*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2409*c0909341SAndroid Build Coastguard Worker        ld1             {v28.4s, v29.4s}, [x2]
2410*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2411*c0909341SAndroid Build Coastguard Worker        ld1             {v30.4s, v31.4s}, [x2]
2412*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2413*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
2414*c0909341SAndroid Build Coastguard Worker        sqxtn2          v16.8h,  v17.4s
2415*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v18.4s
2416*c0909341SAndroid Build Coastguard Worker        sqxtn2          v17.8h,  v19.4s
2417*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v20.4s
2418*c0909341SAndroid Build Coastguard Worker        sqxtn2          v18.8h,  v21.4s
2419*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v22.4s
2420*c0909341SAndroid Build Coastguard Worker        sqxtn2          v19.8h,  v23.4s
2421*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v24.4s
2422*c0909341SAndroid Build Coastguard Worker        sqxtn2          v20.8h,  v25.4s
2423*c0909341SAndroid Build Coastguard Worker        sqxtn           v21.4h,  v26.4s
2424*c0909341SAndroid Build Coastguard Worker        sqxtn2          v21.8h,  v27.4s
2425*c0909341SAndroid Build Coastguard Worker        sqxtn           v22.4h,  v28.4s
2426*c0909341SAndroid Build Coastguard Worker        sqxtn2          v22.8h,  v29.4s
2427*c0909341SAndroid Build Coastguard Worker        sqxtn           v23.4h,  v30.4s
2428*c0909341SAndroid Build Coastguard Worker        sqxtn2          v23.8h,  v31.4s
2429*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2430*c0909341SAndroid Build Coastguard Worker
2431*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2432*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x12], #4
2433*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2434*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #2*8
2435*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2436*c0909341SAndroid Build Coastguard Worker        b.ge            2b
2437*c0909341SAndroid Build Coastguard Worker
2438*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x13], #4
2439*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2440*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2441*c0909341SAndroid Build Coastguard Worker
2442*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  w9, uxtw #1
2443*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1, lsl #3
2444*c0909341SAndroid Build Coastguard Worker        msub            x2,  x8,  x9,  x2
2445*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #4*8
2446*c0909341SAndroid Build Coastguard Worker        b               1b
2447*c0909341SAndroid Build Coastguard Worker9:
2448*c0909341SAndroid Build Coastguard Worker        ret
2449*c0909341SAndroid Build Coastguard Workerendfunc
2450*c0909341SAndroid Build Coastguard Worker
2451*c0909341SAndroid Build Coastguard Worker.macro shift_16_regs op, shift
2452*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
2453*c0909341SAndroid Build Coastguard Worker        \op             \i,  \i,  #\shift
2454*c0909341SAndroid Build Coastguard Worker.endr
2455*c0909341SAndroid Build Coastguard Worker.endm
2456*c0909341SAndroid Build Coastguard Worker
2457*c0909341SAndroid Build Coastguard Worker.macro def_identity_1632 w, h, wshort, hshort
2458*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1
2459*c0909341SAndroid Build Coastguard Worker        movz            w16, #2896*8, lsl #16
2460*c0909341SAndroid Build Coastguard Worker        movz            w17, #2*(5793-4096)*8, lsl #16
2461*c0909341SAndroid Build Coastguard Worker        movi            v0.4s,   #0
2462*c0909341SAndroid Build Coastguard Worker        movi            v1.4s,   #0
2463*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32\hshort, 2
2464*c0909341SAndroid Build Coastguard Worker
2465*c0909341SAndroid Build Coastguard Worker        mov             x8,  #4*\h
2466*c0909341SAndroid Build Coastguard Worker1:
2467*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2468*c0909341SAndroid Build Coastguard Worker        movrel          x12, eob_16x32\wshort, 2
2469*c0909341SAndroid Build Coastguard Worker2:
2470*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2471*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s, v17.4s}, [x2]
2472*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2473*c0909341SAndroid Build Coastguard Worker        dup             v2.2s,   w16
2474*c0909341SAndroid Build Coastguard Worker        ld1             {v18.4s, v19.4s}, [x2]
2475*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2476*c0909341SAndroid Build Coastguard Worker        mov             v2.s[1], w17
2477*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4s, v21.4s}, [x2]
2478*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2479*c0909341SAndroid Build Coastguard Worker        ld1             {v22.4s, v23.4s}, [x2]
2480*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2481*c0909341SAndroid Build Coastguard Worker        ld1             {v24.4s, v25.4s}, [x2]
2482*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2483*c0909341SAndroid Build Coastguard Worker        ld1             {v26.4s, v27.4s}, [x2]
2484*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2485*c0909341SAndroid Build Coastguard Worker        ld1             {v28.4s, v29.4s}, [x2]
2486*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2487*c0909341SAndroid Build Coastguard Worker        ld1             {v30.4s, v31.4s}, [x2]
2488*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2489*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v2.s[0], v16, v17, v18, v19, v20, v21, v22, v23
2490*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, v2.s[0], v24, v25, v26, v27, v28, v29, v30, v31
2491*c0909341SAndroid Build Coastguard Worker
2492*c0909341SAndroid Build Coastguard Worker.if \w == 16
2493*c0909341SAndroid Build Coastguard Worker        // 16x32
2494*c0909341SAndroid Build Coastguard Worker        identity_4x16_shift1 v2.s[1]
2495*c0909341SAndroid Build Coastguard Worker.else
2496*c0909341SAndroid Build Coastguard Worker        // 32x16
2497*c0909341SAndroid Build Coastguard Worker        shift_16_regs   sqshl, 1
2498*c0909341SAndroid Build Coastguard Worker        identity_4x16   v2.s[1]
2499*c0909341SAndroid Build Coastguard Worker.endif
2500*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
2501*c0909341SAndroid Build Coastguard Worker        sqxtn2          v16.8h,  v17.4s
2502*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v18.4s
2503*c0909341SAndroid Build Coastguard Worker        sqxtn2          v17.8h,  v19.4s
2504*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v20.4s
2505*c0909341SAndroid Build Coastguard Worker        sqxtn2          v18.8h,  v21.4s
2506*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v22.4s
2507*c0909341SAndroid Build Coastguard Worker        sqxtn2          v19.8h,  v23.4s
2508*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v24.4s
2509*c0909341SAndroid Build Coastguard Worker        sqxtn2          v20.8h,  v25.4s
2510*c0909341SAndroid Build Coastguard Worker        sqxtn           v21.4h,  v26.4s
2511*c0909341SAndroid Build Coastguard Worker        sqxtn2          v21.8h,  v27.4s
2512*c0909341SAndroid Build Coastguard Worker        sqxtn           v22.4h,  v28.4s
2513*c0909341SAndroid Build Coastguard Worker        sqxtn2          v22.8h,  v29.4s
2514*c0909341SAndroid Build Coastguard Worker        sqxtn           v23.4h,  v30.4s
2515*c0909341SAndroid Build Coastguard Worker        sqxtn2          v23.8h,  v31.4s
2516*c0909341SAndroid Build Coastguard Worker
2517*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2518*c0909341SAndroid Build Coastguard Worker
2519*c0909341SAndroid Build Coastguard Worker.if \w == 16
2520*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2521*c0909341SAndroid Build Coastguard Worker.else
2522*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=4
2523*c0909341SAndroid Build Coastguard Worker.endif
2524*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x12], #4
2525*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2526*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #16
2527*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2528*c0909341SAndroid Build Coastguard Worker        b.ge            2b
2529*c0909341SAndroid Build Coastguard Worker
2530*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x13], #4
2531*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2532*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2533*c0909341SAndroid Build Coastguard Worker
2534*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  w9, uxtw #1
2535*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1, lsl #3
2536*c0909341SAndroid Build Coastguard Worker        msub            x2,  x8,  x9,  x2
2537*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #4*8
2538*c0909341SAndroid Build Coastguard Worker        b               1b
2539*c0909341SAndroid Build Coastguard Worker9:
2540*c0909341SAndroid Build Coastguard Worker        ret
2541*c0909341SAndroid Build Coastguard Workerendfunc
2542*c0909341SAndroid Build Coastguard Worker.endm
2543*c0909341SAndroid Build Coastguard Worker
2544*c0909341SAndroid Build Coastguard Workerdef_identity_1632 16, 32, _shortside,
2545*c0909341SAndroid Build Coastguard Workerdef_identity_1632 32, 16, , _shortside
2546*c0909341SAndroid Build Coastguard Worker
2547*c0909341SAndroid Build Coastguard Worker.macro def_identity_832 w, h
2548*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1
2549*c0909341SAndroid Build Coastguard Worker        movi            v0.4s,  #0
2550*c0909341SAndroid Build Coastguard Worker        movi            v1.4s,  #0
2551*c0909341SAndroid Build Coastguard Worker        // Working on 8x8 blocks, read every other entry from eob_8x32
2552*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x32, 2
2553*c0909341SAndroid Build Coastguard Worker
2554*c0909341SAndroid Build Coastguard Worker        mov             w8,  #4*\h
2555*c0909341SAndroid Build Coastguard Worker1:
2556*c0909341SAndroid Build Coastguard Worker        // Working on 8x8 blocks, read every other entry from eob_8x32
2557*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #4
2558*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s, v17.4s}, [x2]
2559*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2560*c0909341SAndroid Build Coastguard Worker        ld1             {v18.4s, v19.4s}, [x2]
2561*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2562*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4s, v21.4s}, [x2]
2563*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2564*c0909341SAndroid Build Coastguard Worker        ld1             {v22.4s, v23.4s}, [x2]
2565*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2566*c0909341SAndroid Build Coastguard Worker        ld1             {v24.4s, v25.4s}, [x2]
2567*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2568*c0909341SAndroid Build Coastguard Worker        ld1             {v26.4s, v27.4s}, [x2]
2569*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2570*c0909341SAndroid Build Coastguard Worker        ld1             {v28.4s, v29.4s}, [x2]
2571*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2572*c0909341SAndroid Build Coastguard Worker        ld1             {v30.4s, v31.4s}, [x2]
2573*c0909341SAndroid Build Coastguard Worker        st1             {v0.4s, v1.4s},   [x2], x8
2574*c0909341SAndroid Build Coastguard Worker
2575*c0909341SAndroid Build Coastguard Worker.if \w == 8
2576*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #1
2577*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v17.4s,  #1
2578*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v18.4s,  #1
2579*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v19.4s,  #1
2580*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v20.4s,  #1
2581*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v21.4s,  #1
2582*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v22.4s,  #1
2583*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #1
2584*c0909341SAndroid Build Coastguard Worker        sqrshrn         v20.4h,  v24.4s,  #1
2585*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v20.8h,  v25.4s,  #1
2586*c0909341SAndroid Build Coastguard Worker        sqrshrn         v21.4h,  v26.4s,  #1
2587*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v21.8h,  v27.4s,  #1
2588*c0909341SAndroid Build Coastguard Worker        sqrshrn         v22.4h,  v28.4s,  #1
2589*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v22.8h,  v29.4s,  #1
2590*c0909341SAndroid Build Coastguard Worker        sqrshrn         v23.4h,  v30.4s,  #1
2591*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v23.8h,  v31.4s,  #1
2592*c0909341SAndroid Build Coastguard Worker.else
2593*c0909341SAndroid Build Coastguard Worker        sqxtn           v16.4h,  v16.4s
2594*c0909341SAndroid Build Coastguard Worker        sqxtn2          v16.8h,  v17.4s
2595*c0909341SAndroid Build Coastguard Worker        sqxtn           v17.4h,  v18.4s
2596*c0909341SAndroid Build Coastguard Worker        sqxtn2          v17.8h,  v19.4s
2597*c0909341SAndroid Build Coastguard Worker        sqxtn           v18.4h,  v20.4s
2598*c0909341SAndroid Build Coastguard Worker        sqxtn2          v18.8h,  v21.4s
2599*c0909341SAndroid Build Coastguard Worker        sqxtn           v19.4h,  v22.4s
2600*c0909341SAndroid Build Coastguard Worker        sqxtn2          v19.8h,  v23.4s
2601*c0909341SAndroid Build Coastguard Worker        sqxtn           v20.4h,  v24.4s
2602*c0909341SAndroid Build Coastguard Worker        sqxtn2          v20.8h,  v25.4s
2603*c0909341SAndroid Build Coastguard Worker        sqxtn           v21.4h,  v26.4s
2604*c0909341SAndroid Build Coastguard Worker        sqxtn2          v21.8h,  v27.4s
2605*c0909341SAndroid Build Coastguard Worker        sqxtn           v22.4h,  v28.4s
2606*c0909341SAndroid Build Coastguard Worker        sqxtn2          v22.8h,  v29.4s
2607*c0909341SAndroid Build Coastguard Worker        sqxtn           v23.4h,  v30.4s
2608*c0909341SAndroid Build Coastguard Worker        sqxtn2          v23.8h,  v31.4s
2609*c0909341SAndroid Build Coastguard Worker.endif
2610*c0909341SAndroid Build Coastguard Worker
2611*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2612*c0909341SAndroid Build Coastguard Worker
2613*c0909341SAndroid Build Coastguard Worker
2614*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2615*c0909341SAndroid Build Coastguard Worker.if \w == 8
2616*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2617*c0909341SAndroid Build Coastguard Worker.else
2618*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=3
2619*c0909341SAndroid Build Coastguard Worker.endif
2620*c0909341SAndroid Build Coastguard Worker
2621*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2622*c0909341SAndroid Build Coastguard Worker.if \w == 8
2623*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x8, lsl #3
2624*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #4*8
2625*c0909341SAndroid Build Coastguard Worker.else
2626*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2627*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #2*8
2628*c0909341SAndroid Build Coastguard Worker.endif
2629*c0909341SAndroid Build Coastguard Worker        b               1b
2630*c0909341SAndroid Build Coastguard Worker
2631*c0909341SAndroid Build Coastguard Worker9:
2632*c0909341SAndroid Build Coastguard Worker        ret
2633*c0909341SAndroid Build Coastguard Workerendfunc
2634*c0909341SAndroid Build Coastguard Worker.endm
2635*c0909341SAndroid Build Coastguard Worker
2636*c0909341SAndroid Build Coastguard Workerdef_identity_832 8, 32
2637*c0909341SAndroid Build Coastguard Workerdef_identity_832 32, 8
2638*c0909341SAndroid Build Coastguard Worker
2639*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x32_16bpc_neon, export=1
2640*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  32,  2
2641*c0909341SAndroid Build Coastguard Worker
2642*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2643*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #2048
2644*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
2645*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2646*c0909341SAndroid Build Coastguard Worker
2647*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2648*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*32*2)
2649*c0909341SAndroid Build Coastguard Worker.if \i > 0
2650*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
2651*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2652*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2653*c0909341SAndroid Build Coastguard Worker.if \i < 28
2654*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2655*c0909341SAndroid Build Coastguard Worker.endif
2656*c0909341SAndroid Build Coastguard Worker.endif
2657*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
2658*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*4
2659*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x4_neon
2660*c0909341SAndroid Build Coastguard Worker.endr
2661*c0909341SAndroid Build Coastguard Worker        b               3f
2662*c0909341SAndroid Build Coastguard Worker
2663*c0909341SAndroid Build Coastguard Worker1:
2664*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2665*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2666*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2667*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2668*c0909341SAndroid Build Coastguard Worker2:
2669*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
2670*c0909341SAndroid Build Coastguard Worker.rept 4
2671*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2672*c0909341SAndroid Build Coastguard Worker.endr
2673*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2674*c0909341SAndroid Build Coastguard Worker
2675*c0909341SAndroid Build Coastguard Worker3:
2676*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2677*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
2678*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2679*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
2680*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2681*c0909341SAndroid Build Coastguard Worker.endr
2682*c0909341SAndroid Build Coastguard Worker
2683*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #2048
2684*c0909341SAndroid Build Coastguard Worker        ret             x15
2685*c0909341SAndroid Build Coastguard Workerendfunc
2686*c0909341SAndroid Build Coastguard Worker
2687*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x32_16bpc_neon, export=1
2688*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  32,  1
2689*c0909341SAndroid Build Coastguard Worker
2690*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2691*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #1024
2692*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
2693*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2694*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_dct_4s_x16_neon
2695*c0909341SAndroid Build Coastguard Worker
2696*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2697*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*16*2)
2698*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
2699*c0909341SAndroid Build Coastguard Worker.if \i > 0
2700*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
2701*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2702*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2703*c0909341SAndroid Build Coastguard Worker.if \i < 28
2704*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2705*c0909341SAndroid Build Coastguard Worker.endif
2706*c0909341SAndroid Build Coastguard Worker.endif
2707*c0909341SAndroid Build Coastguard Worker        mov             x8,  #4*32
2708*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_16x4_neon
2709*c0909341SAndroid Build Coastguard Worker.endr
2710*c0909341SAndroid Build Coastguard Worker        b               3f
2711*c0909341SAndroid Build Coastguard Worker
2712*c0909341SAndroid Build Coastguard Worker1:
2713*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2714*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2715*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2716*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2717*c0909341SAndroid Build Coastguard Worker2:
2718*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
2719*c0909341SAndroid Build Coastguard Worker.rept 2
2720*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2721*c0909341SAndroid Build Coastguard Worker.endr
2722*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2723*c0909341SAndroid Build Coastguard Worker
2724*c0909341SAndroid Build Coastguard Worker3:
2725*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
2726*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
2727*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2728*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
2729*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2730*c0909341SAndroid Build Coastguard Worker.endr
2731*c0909341SAndroid Build Coastguard Worker
2732*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #1024
2733*c0909341SAndroid Build Coastguard Worker        ret             x15
2734*c0909341SAndroid Build Coastguard Workerendfunc
2735*c0909341SAndroid Build Coastguard Worker
2736*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x16_16bpc_neon, export=1
2737*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  16,  1
2738*c0909341SAndroid Build Coastguard Worker
2739*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2740*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #1024
2741*c0909341SAndroid Build Coastguard Worker
2742*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
2743*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_dct_8h_x16_neon)
2744*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2745*c0909341SAndroid Build Coastguard Worker
2746*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
2747*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*32*2)
2748*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
2749*c0909341SAndroid Build Coastguard Worker.if \i > 0
2750*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(16 - \i)
2751*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2752*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2753*c0909341SAndroid Build Coastguard Worker.if \i < 12
2754*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2755*c0909341SAndroid Build Coastguard Worker.endif
2756*c0909341SAndroid Build Coastguard Worker.endif
2757*c0909341SAndroid Build Coastguard Worker        mov             x8,  #4*16
2758*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x4_neon
2759*c0909341SAndroid Build Coastguard Worker.endr
2760*c0909341SAndroid Build Coastguard Worker        b               3f
2761*c0909341SAndroid Build Coastguard Worker
2762*c0909341SAndroid Build Coastguard Worker1:
2763*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2764*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2765*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2766*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2767*c0909341SAndroid Build Coastguard Worker2:
2768*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
2769*c0909341SAndroid Build Coastguard Worker.rept 4
2770*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2771*c0909341SAndroid Build Coastguard Worker.endr
2772*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2773*c0909341SAndroid Build Coastguard Worker
2774*c0909341SAndroid Build Coastguard Worker3:
2775*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2776*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
2777*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2778*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
2779*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
2780*c0909341SAndroid Build Coastguard Worker.endr
2781*c0909341SAndroid Build Coastguard Worker
2782*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #1024
2783*c0909341SAndroid Build Coastguard Worker        ret             x15
2784*c0909341SAndroid Build Coastguard Workerendfunc
2785*c0909341SAndroid Build Coastguard Worker
2786*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_8x32_16bpc_neon, export=1
2787*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   32, 2
2788*c0909341SAndroid Build Coastguard Worker
2789*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2790*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
2791*c0909341SAndroid Build Coastguard Worker
2792*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x32
2793*c0909341SAndroid Build Coastguard Worker
2794*c0909341SAndroid Build Coastguard Worker        movi            v28.4s,  #0
2795*c0909341SAndroid Build Coastguard Worker        mov             x8,  #4*32
2796*c0909341SAndroid Build Coastguard Worker        mov             w9,  #32
2797*c0909341SAndroid Build Coastguard Worker        mov             x6,  sp
2798*c0909341SAndroid Build Coastguard Worker        mov             x7,  x2
2799*c0909341SAndroid Build Coastguard Worker1:
2800*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2801*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().4s}, [x7]
2802*c0909341SAndroid Build Coastguard Worker        st1             {v28.4s}, [x7], x8
2803*c0909341SAndroid Build Coastguard Worker.endr
2804*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2805*c0909341SAndroid Build Coastguard Worker        sub             w9,  w9,  #4
2806*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #3
2807*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  #4*4
2808*c0909341SAndroid Build Coastguard Worker
2809*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4s_x8_neon
2810*c0909341SAndroid Build Coastguard Worker
2811*c0909341SAndroid Build Coastguard Worker        sqrshrn         v16.4h,  v16.4s,  #2
2812*c0909341SAndroid Build Coastguard Worker        sqrshrn         v17.4h,  v17.4s,  #2
2813*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s,  #2
2814*c0909341SAndroid Build Coastguard Worker        sqrshrn         v19.4h,  v19.4s,  #2
2815*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v16.8h,  v20.4s,  #2
2816*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v17.8h,  v21.4s,  #2
2817*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v22.4s,  #2
2818*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v19.8h,  v23.4s,  #2
2819*c0909341SAndroid Build Coastguard Worker
2820*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2,  v3,  v4,  v5
2821*c0909341SAndroid Build Coastguard Worker
2822*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2823*c0909341SAndroid Build Coastguard Worker        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x6], #64
2824*c0909341SAndroid Build Coastguard Worker
2825*c0909341SAndroid Build Coastguard Worker        b.ge            1b
2826*c0909341SAndroid Build Coastguard Worker        cbz             w9,  3f
2827*c0909341SAndroid Build Coastguard Worker
2828*c0909341SAndroid Build Coastguard Worker        movi            v29.8h,  #0
2829*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
2830*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
2831*c0909341SAndroid Build Coastguard Worker2:
2832*c0909341SAndroid Build Coastguard Worker        subs            w9,  w9,  #4
2833*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h,v29.8h,v30.8h,v31.8h}, [x6], #64
2834*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2835*c0909341SAndroid Build Coastguard Worker
2836*c0909341SAndroid Build Coastguard Worker3:
2837*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
2838*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
2839*c0909341SAndroid Build Coastguard Worker        mov             x8,  #8*2
2840*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2841*c0909341SAndroid Build Coastguard Worker
2842*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
2843*c0909341SAndroid Build Coastguard Worker        ret             x15
2844*c0909341SAndroid Build Coastguard Workerendfunc
2845*c0909341SAndroid Build Coastguard Worker
2846*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x8_16bpc_neon, export=1
2847*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  8,   2
2848*c0909341SAndroid Build Coastguard Worker
2849*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2850*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
2851*c0909341SAndroid Build Coastguard Worker
2852*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
2853*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*32*2)
2854*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
2855*c0909341SAndroid Build Coastguard Worker.if \i > 0
2856*c0909341SAndroid Build Coastguard Worker        cmp             w3,  #10
2857*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2858*c0909341SAndroid Build Coastguard Worker.endif
2859*c0909341SAndroid Build Coastguard Worker        mov             x8,  #8*4
2860*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x4_neon
2861*c0909341SAndroid Build Coastguard Worker.endr
2862*c0909341SAndroid Build Coastguard Worker        b               2f
2863*c0909341SAndroid Build Coastguard Worker
2864*c0909341SAndroid Build Coastguard Worker1:
2865*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,   #0
2866*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,   #0
2867*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,   #0
2868*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,   #0
2869*c0909341SAndroid Build Coastguard Worker.rept 4
2870*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2871*c0909341SAndroid Build Coastguard Worker.endr
2872*c0909341SAndroid Build Coastguard Worker
2873*c0909341SAndroid Build Coastguard Worker2:
2874*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*32
2875*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2876*c0909341SAndroid Build Coastguard Worker1:
2877*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  x9, lsl #1
2878*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  x9, lsl #1 // #(\i*2)
2879*c0909341SAndroid Build Coastguard Worker
2880*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2881*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2882*c0909341SAndroid Build Coastguard Worker.endr
2883*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2884*c0909341SAndroid Build Coastguard Worker
2885*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct_8h_x8_neon)
2886*c0909341SAndroid Build Coastguard Worker
2887*c0909341SAndroid Build Coastguard Worker        cmp             w9,  #32
2888*c0909341SAndroid Build Coastguard Worker
2889*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x6, x7
2890*c0909341SAndroid Build Coastguard Worker
2891*c0909341SAndroid Build Coastguard Worker        b.lt            1b
2892*c0909341SAndroid Build Coastguard Worker
2893*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
2894*c0909341SAndroid Build Coastguard Worker        ret             x15
2895*c0909341SAndroid Build Coastguard Workerendfunc
2896*c0909341SAndroid Build Coastguard Worker
2897*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step1_neon
2898*c0909341SAndroid Build Coastguard Worker        // in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a
2899*c0909341SAndroid Build Coastguard Worker        // in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a
2900*c0909341SAndroid Build Coastguard Worker        // in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a
2901*c0909341SAndroid Build Coastguard Worker        // in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a
2902*c0909341SAndroid Build Coastguard Worker
2903*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s, v1.4s}, [x17], #32
2904*c0909341SAndroid Build Coastguard Worker
2905*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v23.4s,  v16.4s,  v0.s[1]       // t63a
2906*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.4s,  v16.4s,  v0.s[0]       // t32a
2907*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v22.4s,  v17.4s,  v0.s[2]       // t62a
2908*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v17.4s,  v17.4s,  v0.s[3]       // t33a
2909*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v21.4s,  v18.4s,  v1.s[1]       // t61a
2910*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v18.4s,  v18.4s,  v1.s[0]       // t34a
2911*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.4s,  v19.4s,  v1.s[2]       // t60a
2912*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v19.4s,  v19.4s,  v1.s[3]       // t35a
2913*c0909341SAndroid Build Coastguard Worker
2914*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x17], #16
2915*c0909341SAndroid Build Coastguard Worker
2916*c0909341SAndroid Build Coastguard Worker        sqadd           v24.4s,  v16.4s,  v17.4s        // t32
2917*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v16.4s,  v17.4s        // t33
2918*c0909341SAndroid Build Coastguard Worker        sqsub           v26.4s,  v19.4s,  v18.4s        // t34
2919*c0909341SAndroid Build Coastguard Worker        sqadd           v27.4s,  v19.4s,  v18.4s        // t35
2920*c0909341SAndroid Build Coastguard Worker        sqadd           v28.4s,  v20.4s,  v21.4s        // t60
2921*c0909341SAndroid Build Coastguard Worker        sqsub           v29.4s,  v20.4s,  v21.4s        // t61
2922*c0909341SAndroid Build Coastguard Worker        sqsub           v30.4s,  v23.4s,  v22.4s        // t62
2923*c0909341SAndroid Build Coastguard Worker        sqadd           v31.4s,  v23.4s,  v22.4s        // t63
2924*c0909341SAndroid Build Coastguard Worker
2925*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31
2926*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
2927*c0909341SAndroid Build Coastguard Worker.endr
2928*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31
2929*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
2930*c0909341SAndroid Build Coastguard Worker.endr
2931*c0909341SAndroid Build Coastguard Worker
2932*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v29, v26, v0.s[0], v0.s[1] // -> t34a
2933*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v29, v26, v0.s[1], v0.s[0] // -> t61a
2934*c0909341SAndroid Build Coastguard Worker        neg             v2.4s,   v2.4s                  // t34a
2935*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v30, v25, v0.s[1], v0.s[0] // -> t33a
2936*c0909341SAndroid Build Coastguard Worker        srshr           v26.4s, v2.4s,  #12             // t34a
2937*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v30, v25, v0.s[0], v0.s[1] // -> t62a
2938*c0909341SAndroid Build Coastguard Worker        srshr           v29.4s, v7.4s,  #12             // t61a
2939*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v6.4s,  #12             // t33a
2940*c0909341SAndroid Build Coastguard Worker        srshr           v30.4s, v2.4s,  #12             // t62a
2941*c0909341SAndroid Build Coastguard Worker
2942*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v24.4s,  v27.4s        // t32a
2943*c0909341SAndroid Build Coastguard Worker        sqsub           v19.4s,  v24.4s,  v27.4s        // t35a
2944*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v25.4s,  v26.4s        // t33
2945*c0909341SAndroid Build Coastguard Worker        sqsub           v18.4s,  v25.4s,  v26.4s        // t34
2946*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,  v31.4s,  v28.4s        // t60a
2947*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v31.4s,  v28.4s        // t63a
2948*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,  v30.4s,  v29.4s        // t61
2949*c0909341SAndroid Build Coastguard Worker        sqadd           v22.4s,  v30.4s,  v29.4s        // t62
2950*c0909341SAndroid Build Coastguard Worker
2951*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22
2952*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
2953*c0909341SAndroid Build Coastguard Worker.endr
2954*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22
2955*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
2956*c0909341SAndroid Build Coastguard Worker.endr
2957*c0909341SAndroid Build Coastguard Worker
2958*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v21, v18, v0.s[2], v0.s[3] // -> t61a
2959*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v21, v18, v0.s[3], v0.s[2] // -> t34a
2960*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v20, v19, v0.s[2], v0.s[3] // -> t60
2961*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v2.4s,  #12             // t61a
2962*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v7.4s,  #12             // t34a
2963*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v20, v19, v0.s[3], v0.s[2] // -> t35
2964*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v6.4s,  #12             // t60
2965*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v2.4s,  #12             // t35
2966*c0909341SAndroid Build Coastguard Worker
2967*c0909341SAndroid Build Coastguard Worker        st1             {v16.4s, v17.4s, v18.4s, v19.4s}, [x6], #64
2968*c0909341SAndroid Build Coastguard Worker        st1             {v20.4s, v21.4s, v22.4s, v23.4s}, [x6], #64
2969*c0909341SAndroid Build Coastguard Worker
2970*c0909341SAndroid Build Coastguard Worker        ret
2971*c0909341SAndroid Build Coastguard Workerendfunc
2972*c0909341SAndroid Build Coastguard Worker
2973*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step2_neon
2974*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
2975*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4s}, [x16]
2976*c0909341SAndroid Build Coastguard Worker1:
2977*c0909341SAndroid Build Coastguard Worker        // t32a/33/34a/35/60/61a/62/63a
2978*c0909341SAndroid Build Coastguard Worker        // t56a/57/58a/59/36/37a/38/39a
2979*c0909341SAndroid Build Coastguard Worker        // t40a/41/42a/43/52/53a/54/55a
2980*c0909341SAndroid Build Coastguard Worker        // t48a/49/50a/51/44/45a/46/47a
2981*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x6, #4*4*0]  // t32a
2982*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x9, #4*4*8]  // t39a
2983*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9, #4*4*0]  // t63a
2984*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x6, #4*4*8]  // t56a
2985*c0909341SAndroid Build Coastguard Worker        ldr             q20, [x6, #4*4*16] // t40a
2986*c0909341SAndroid Build Coastguard Worker        ldr             q21, [x9, #4*4*24] // t47a
2987*c0909341SAndroid Build Coastguard Worker        ldr             q22, [x9, #4*4*16] // t55a
2988*c0909341SAndroid Build Coastguard Worker        ldr             q23, [x6, #4*4*24] // t48a
2989*c0909341SAndroid Build Coastguard Worker
2990*c0909341SAndroid Build Coastguard Worker        sqadd           v24.4s,  v16.4s, v17.4s         // t32
2991*c0909341SAndroid Build Coastguard Worker        sqsub           v25.4s,  v16.4s, v17.4s         // t39
2992*c0909341SAndroid Build Coastguard Worker        sqadd           v26.4s,  v18.4s, v19.4s         // t63
2993*c0909341SAndroid Build Coastguard Worker        sqsub           v27.4s,  v18.4s, v19.4s         // t56
2994*c0909341SAndroid Build Coastguard Worker        sqsub           v28.4s,  v21.4s, v20.4s         // t40
2995*c0909341SAndroid Build Coastguard Worker        sqadd           v29.4s,  v21.4s, v20.4s         // t47
2996*c0909341SAndroid Build Coastguard Worker        sqadd           v30.4s,  v23.4s, v22.4s         // t48
2997*c0909341SAndroid Build Coastguard Worker        sqsub           v31.4s,  v23.4s, v22.4s         // t55
2998*c0909341SAndroid Build Coastguard Worker
2999*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31
3000*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
3001*c0909341SAndroid Build Coastguard Worker.endr
3002*c0909341SAndroid Build Coastguard Worker.irp r, v24, v25, v26, v27, v28, v29, v30, v31
3003*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
3004*c0909341SAndroid Build Coastguard Worker.endr
3005*c0909341SAndroid Build Coastguard Worker
3006*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v27, v25, v0.s[3], v0.s[2] // -> t56a
3007*c0909341SAndroid Build Coastguard Worker        mul_mls         v7,  v27, v25, v0.s[2], v0.s[3] // -> t39a
3008*c0909341SAndroid Build Coastguard Worker        mul_mla         v6,  v31, v28, v0.s[3], v0.s[2] // -> t40a
3009*c0909341SAndroid Build Coastguard Worker        srshr           v25.4s, v2.4s,  #12             // t56a
3010*c0909341SAndroid Build Coastguard Worker        srshr           v27.4s, v7.4s,  #12             // t39a
3011*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s                  // t40a
3012*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v31, v28, v0.s[2], v0.s[3] // -> t55a
3013*c0909341SAndroid Build Coastguard Worker        srshr           v31.4s, v6.4s,  #12             // t40a
3014*c0909341SAndroid Build Coastguard Worker        srshr           v28.4s, v2.4s,  #12             // t55a
3015*c0909341SAndroid Build Coastguard Worker
3016*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4s,  v24.4s,  v29.4s        // t32a
3017*c0909341SAndroid Build Coastguard Worker        sqsub           v19.4s,  v24.4s,  v29.4s        // t47a
3018*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4s,  v27.4s,  v31.4s        // t39
3019*c0909341SAndroid Build Coastguard Worker        sqsub           v18.4s,  v27.4s,  v31.4s        // t40
3020*c0909341SAndroid Build Coastguard Worker        sqsub           v20.4s,  v26.4s,  v30.4s        // t48a
3021*c0909341SAndroid Build Coastguard Worker        sqadd           v23.4s,  v26.4s,  v30.4s        // t63a
3022*c0909341SAndroid Build Coastguard Worker        sqsub           v21.4s,  v25.4s,  v28.4s        // t55
3023*c0909341SAndroid Build Coastguard Worker        sqadd           v22.4s,  v25.4s,  v28.4s        // t56
3024*c0909341SAndroid Build Coastguard Worker
3025*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22
3026*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
3027*c0909341SAndroid Build Coastguard Worker.endr
3028*c0909341SAndroid Build Coastguard Worker.irp r, v16, v19, v17, v18, v20, v23, v21, v22
3029*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
3030*c0909341SAndroid Build Coastguard Worker.endr
3031*c0909341SAndroid Build Coastguard Worker
3032*c0909341SAndroid Build Coastguard Worker        mul_mls         v2,  v21, v18, v0.s[0], v0.s[0] // -> t40a
3033*c0909341SAndroid Build Coastguard Worker        mul_mla         v7,  v21, v18, v0.s[0], v0.s[0] // -> t55a
3034*c0909341SAndroid Build Coastguard Worker        mul_mls         v6,  v20, v19, v0.s[0], v0.s[0] // -> t47
3035*c0909341SAndroid Build Coastguard Worker        srshr           v18.4s, v2.4s,  #12             // t40a
3036*c0909341SAndroid Build Coastguard Worker        srshr           v21.4s, v7.4s,  #12             // t55a
3037*c0909341SAndroid Build Coastguard Worker        mul_mla         v2,  v20, v19, v0.s[0], v0.s[0] // -> t48
3038*c0909341SAndroid Build Coastguard Worker        srshr           v19.4s, v6.4s,  #12             // t47
3039*c0909341SAndroid Build Coastguard Worker        srshr           v20.4s, v2.4s,  #12             // t48
3040*c0909341SAndroid Build Coastguard Worker
3041*c0909341SAndroid Build Coastguard Worker        str             q16, [x6, #4*4*0]  // t32a
3042*c0909341SAndroid Build Coastguard Worker        str             q17, [x9, #4*4*0]  // t39
3043*c0909341SAndroid Build Coastguard Worker        str             q18, [x6, #4*4*8]  // t40a
3044*c0909341SAndroid Build Coastguard Worker        str             q19, [x9, #4*4*8]  // t47
3045*c0909341SAndroid Build Coastguard Worker        str             q20, [x6, #4*4*16] // t48
3046*c0909341SAndroid Build Coastguard Worker        str             q21, [x9, #4*4*16] // t55a
3047*c0909341SAndroid Build Coastguard Worker        str             q22, [x6, #4*4*24] // t56
3048*c0909341SAndroid Build Coastguard Worker        str             q23, [x9, #4*4*24] // t63a
3049*c0909341SAndroid Build Coastguard Worker
3050*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #4*4
3051*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  #4*4
3052*c0909341SAndroid Build Coastguard Worker        cmp             x6,  x9
3053*c0909341SAndroid Build Coastguard Worker        b.lt            1b
3054*c0909341SAndroid Build Coastguard Worker        ret
3055*c0909341SAndroid Build Coastguard Workerendfunc
3056*c0909341SAndroid Build Coastguard Worker
3057*c0909341SAndroid Build Coastguard Worker.macro load8 src, strd, zero, clear
3058*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s
3059*c0909341SAndroid Build Coastguard Worker.if \clear
3060*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [\src]
3061*c0909341SAndroid Build Coastguard Worker        st1             {\zero}, [\src], \strd
3062*c0909341SAndroid Build Coastguard Worker.else
3063*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [\src], \strd
3064*c0909341SAndroid Build Coastguard Worker.endif
3065*c0909341SAndroid Build Coastguard Worker.endr
3066*c0909341SAndroid Build Coastguard Worker.endm
3067*c0909341SAndroid Build Coastguard Worker
3068*c0909341SAndroid Build Coastguard Worker.macro store16 dst
3069*c0909341SAndroid Build Coastguard Worker.irp i, v16.4s, v17.4s, v18.4s, v19.4s, v20.4s, v21.4s, v22.4s, v23.4s, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
3070*c0909341SAndroid Build Coastguard Worker        st1             {\i}, [\dst], #16
3071*c0909341SAndroid Build Coastguard Worker.endr
3072*c0909341SAndroid Build Coastguard Worker.endm
3073*c0909341SAndroid Build Coastguard Worker
3074*c0909341SAndroid Build Coastguard Worker.macro clear_upper8
3075*c0909341SAndroid Build Coastguard Worker.irp i, v24.4s, v25.4s, v26.4s, v27.4s, v28.4s, v29.4s, v30.4s, v31.4s
3076*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
3077*c0909341SAndroid Build Coastguard Worker.endr
3078*c0909341SAndroid Build Coastguard Worker.endm
3079*c0909341SAndroid Build Coastguard Worker
3080*c0909341SAndroid Build Coastguard Worker.macro movi_if reg, val, cond
3081*c0909341SAndroid Build Coastguard Worker.if \cond
3082*c0909341SAndroid Build Coastguard Worker        movi            \reg, \val
3083*c0909341SAndroid Build Coastguard Worker.endif
3084*c0909341SAndroid Build Coastguard Worker.endm
3085*c0909341SAndroid Build Coastguard Worker
3086*c0909341SAndroid Build Coastguard Worker.macro movz16dup_if reg, gpr, val, cond
3087*c0909341SAndroid Build Coastguard Worker.if \cond
3088*c0909341SAndroid Build Coastguard Worker        movz            \gpr, \val, lsl #16
3089*c0909341SAndroid Build Coastguard Worker        dup             \reg, \gpr
3090*c0909341SAndroid Build Coastguard Worker.endif
3091*c0909341SAndroid Build Coastguard Worker.endm
3092*c0909341SAndroid Build Coastguard Worker
3093*c0909341SAndroid Build Coastguard Worker.macro st1_if regs, dst, cond
3094*c0909341SAndroid Build Coastguard Worker.if \cond
3095*c0909341SAndroid Build Coastguard Worker        st1             \regs, \dst
3096*c0909341SAndroid Build Coastguard Worker.endif
3097*c0909341SAndroid Build Coastguard Worker.endm
3098*c0909341SAndroid Build Coastguard Worker
3099*c0909341SAndroid Build Coastguard Worker.macro str_if reg, dst, cond
3100*c0909341SAndroid Build Coastguard Worker.if \cond
3101*c0909341SAndroid Build Coastguard Worker        str             \reg, \dst
3102*c0909341SAndroid Build Coastguard Worker.endif
3103*c0909341SAndroid Build Coastguard Worker.endm
3104*c0909341SAndroid Build Coastguard Worker
3105*c0909341SAndroid Build Coastguard Worker.macro stroff_if reg, dst, dstoff, cond
3106*c0909341SAndroid Build Coastguard Worker.if \cond
3107*c0909341SAndroid Build Coastguard Worker        str             \reg, \dst, \dstoff
3108*c0909341SAndroid Build Coastguard Worker.endif
3109*c0909341SAndroid Build Coastguard Worker.endm
3110*c0909341SAndroid Build Coastguard Worker
3111*c0909341SAndroid Build Coastguard Worker.macro scale_if cond, c, r0, r1, r2, r3, r4, r5, r6, r7
3112*c0909341SAndroid Build Coastguard Worker.if \cond
3113*c0909341SAndroid Build Coastguard Worker        scale_input     .4s, \c, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
3114*c0909341SAndroid Build Coastguard Worker.endif
3115*c0909341SAndroid Build Coastguard Worker.endm
3116*c0909341SAndroid Build Coastguard Worker
3117*c0909341SAndroid Build Coastguard Worker.macro def_dct64_func suffix, clear=0, scale=0
3118*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_dct\suffix\()_4s_x64_neon
3119*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
3120*c0909341SAndroid Build Coastguard Worker        mov             x6,  sp
3121*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #2
3122*c0909341SAndroid Build Coastguard Worker
3123*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3124*c0909341SAndroid Build Coastguard Worker        movi_if         v7.4s,  #0, \clear
3125*c0909341SAndroid Build Coastguard Worker        load8           x7,  x8,  v7.4s, \clear
3126*c0909341SAndroid Build Coastguard Worker        clear_upper8
3127*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #3
3128*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
3129*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
3130*c0909341SAndroid Build Coastguard Worker
3131*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4s_x16_neon
3132*c0909341SAndroid Build Coastguard Worker
3133*c0909341SAndroid Build Coastguard Worker        // idct_16 leaves the row_clip_max/min constants in v5 and v4
3134*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
3135*c0909341SAndroid Build Coastguard Worker        smin_4s         \r, \r, v5
3136*c0909341SAndroid Build Coastguard Worker.endr
3137*c0909341SAndroid Build Coastguard Worker.irp r, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31
3138*c0909341SAndroid Build Coastguard Worker        smax_4s         \r, \r, v4
3139*c0909341SAndroid Build Coastguard Worker.endr
3140*c0909341SAndroid Build Coastguard Worker
3141*c0909341SAndroid Build Coastguard Worker        store16         x6
3142*c0909341SAndroid Build Coastguard Worker
3143*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3144*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
3145*c0909341SAndroid Build Coastguard Worker        load8           x7,  x8,  v7.4s, \clear
3146*c0909341SAndroid Build Coastguard Worker        clear_upper8
3147*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #3
3148*c0909341SAndroid Build Coastguard Worker        lsr             x8,  x8,  #1
3149*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsr #1
3150*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19, v20, v21, v22, v23
3151*c0909341SAndroid Build Coastguard Worker
3152*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_4s_x16_neon
3153*c0909341SAndroid Build Coastguard Worker
3154*c0909341SAndroid Build Coastguard Worker        add             x10, x6,  #16*15
3155*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #16*16
3156*c0909341SAndroid Build Coastguard Worker
3157*c0909341SAndroid Build Coastguard Worker        mov             x9,  #-16
3158*c0909341SAndroid Build Coastguard Worker
3159*c0909341SAndroid Build Coastguard Worker        movi            v1.4s,  #1, msl #16 // row_clip_max = ~(~bdmax << 7), 0x1ffff
3160*c0909341SAndroid Build Coastguard Worker        mvni            v0.4s,  #1, msl #16 // row_clip_min = (~bdmax << 7), 0xfffe0000
3161*c0909341SAndroid Build Coastguard Worker
3162*c0909341SAndroid Build Coastguard Worker.macro store_addsub r0, r1, r2, r3
3163*c0909341SAndroid Build Coastguard Worker        ld1             {v2.4s}, [x6], #16
3164*c0909341SAndroid Build Coastguard Worker        ld1             {v3.4s}, [x6], #16
3165*c0909341SAndroid Build Coastguard Worker        sqadd           v6.4s,  v2.4s,  \r0
3166*c0909341SAndroid Build Coastguard Worker        sqsub           \r0,    v2.4s,  \r0
3167*c0909341SAndroid Build Coastguard Worker        ld1             {v4.4s}, [x6], #16
3168*c0909341SAndroid Build Coastguard Worker        sqadd           v7.4s,  v3.4s,  \r1
3169*c0909341SAndroid Build Coastguard Worker        sqsub           \r1,    v3.4s,  \r1
3170*c0909341SAndroid Build Coastguard Worker        smin            v6.4s,  v6.4s,  v1.4s
3171*c0909341SAndroid Build Coastguard Worker        smin            \r0,    \r0,    v1.4s
3172*c0909341SAndroid Build Coastguard Worker        ld1             {v5.4s}, [x6], #16
3173*c0909341SAndroid Build Coastguard Worker        sqadd           v2.4s,  v4.4s,  \r2
3174*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #16*4
3175*c0909341SAndroid Build Coastguard Worker        smax            v6.4s,  v6.4s,  v0.4s
3176*c0909341SAndroid Build Coastguard Worker        smax            \r0,    \r0,    v0.4s
3177*c0909341SAndroid Build Coastguard Worker        sqsub           \r2,    v4.4s,  \r2
3178*c0909341SAndroid Build Coastguard Worker        smin            v7.4s,  v7.4s,  v1.4s
3179*c0909341SAndroid Build Coastguard Worker        smin            \r1,    \r1,    v1.4s
3180*c0909341SAndroid Build Coastguard Worker        st1             {v6.4s}, [x6], #16
3181*c0909341SAndroid Build Coastguard Worker        st1             {\r0},   [x10], x9
3182*c0909341SAndroid Build Coastguard Worker        smin            v2.4s,  v2.4s,  v1.4s
3183*c0909341SAndroid Build Coastguard Worker        smin            \r2,    \r2,    v1.4s
3184*c0909341SAndroid Build Coastguard Worker        smax            v7.4s,  v7.4s,  v0.4s
3185*c0909341SAndroid Build Coastguard Worker        smax            \r1,    \r1,    v0.4s
3186*c0909341SAndroid Build Coastguard Worker        sqadd           v3.4s,  v5.4s,  \r3
3187*c0909341SAndroid Build Coastguard Worker        sqsub           \r3,    v5.4s,  \r3
3188*c0909341SAndroid Build Coastguard Worker        smax            v2.4s,  v2.4s,  v0.4s
3189*c0909341SAndroid Build Coastguard Worker        smax            \r2,    \r2,    v0.4s
3190*c0909341SAndroid Build Coastguard Worker        smin            v3.4s,  v3.4s,  v1.4s
3191*c0909341SAndroid Build Coastguard Worker        smin            \r3,    \r3,    v1.4s
3192*c0909341SAndroid Build Coastguard Worker        st1             {v7.4s}, [x6], #16
3193*c0909341SAndroid Build Coastguard Worker        st1             {\r1},   [x10], x9
3194*c0909341SAndroid Build Coastguard Worker        smax            v3.4s,  v3.4s,  v0.4s
3195*c0909341SAndroid Build Coastguard Worker        smax            \r3,    \r3,    v0.4s
3196*c0909341SAndroid Build Coastguard Worker        st1             {v2.4s}, [x6], #16
3197*c0909341SAndroid Build Coastguard Worker        st1             {\r2},   [x10], x9
3198*c0909341SAndroid Build Coastguard Worker        st1             {v3.4s}, [x6], #16
3199*c0909341SAndroid Build Coastguard Worker        st1             {\r3},   [x10], x9
3200*c0909341SAndroid Build Coastguard Worker.endm
3201*c0909341SAndroid Build Coastguard Worker        store_addsub    v31.4s, v30.4s, v29.4s, v28.4s
3202*c0909341SAndroid Build Coastguard Worker        store_addsub    v27.4s, v26.4s, v25.4s, v24.4s
3203*c0909341SAndroid Build Coastguard Worker        store_addsub    v23.4s, v22.4s, v21.4s, v20.4s
3204*c0909341SAndroid Build Coastguard Worker        store_addsub    v19.4s, v18.4s, v17.4s, v16.4s
3205*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
3206*c0909341SAndroid Build Coastguard Worker
3207*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #4*4*16
3208*c0909341SAndroid Build Coastguard Worker
3209*c0909341SAndroid Build Coastguard Worker        movrel          x17, idct64_coeffs
3210*c0909341SAndroid Build Coastguard Worker        movi            v5.4s,  #1, msl #16  // row_clip_max = ~(~bdmax << 7), 0x1ffff
3211*c0909341SAndroid Build Coastguard Worker        mvni            v4.4s,  #1, msl #16  // row_clip_min = (~bdmax << 7), 0xfffe0000
3212*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3213*c0909341SAndroid Build Coastguard Worker        movi_if         v7.4s,  #0, \clear
3214*c0909341SAndroid Build Coastguard Worker        add             x9,  x7,  x8, lsl #4 // offset 16
3215*c0909341SAndroid Build Coastguard Worker        add             x10, x7,  x8, lsl #3 // offset 8
3216*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8         // offset 15
3217*c0909341SAndroid Build Coastguard Worker        sub             x11, x10, x8         // offset 7
3218*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s}, [x7]  // in1  (offset 0)
3219*c0909341SAndroid Build Coastguard Worker        ld1             {v17.4s}, [x9]  // in31 (offset 15)
3220*c0909341SAndroid Build Coastguard Worker        ld1             {v18.4s}, [x10] // in17 (offset 8)
3221*c0909341SAndroid Build Coastguard Worker        ld1             {v19.4s}, [x11] // in15 (offset 7)
3222*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x7],  \clear
3223*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x9],  \clear
3224*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x10], \clear
3225*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x11], \clear
3226*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19
3227*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3228*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3229*c0909341SAndroid Build Coastguard Worker        movi_if         v7.4s,  #0, \clear
3230*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsl #2 // offset 4
3231*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8, lsl #2 // offset 11
3232*c0909341SAndroid Build Coastguard Worker        sub             x10, x7,  x8         // offset 3
3233*c0909341SAndroid Build Coastguard Worker        add             x11, x9,  x8         // offset 12
3234*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s}, [x10] // in7  (offset 3)
3235*c0909341SAndroid Build Coastguard Worker        ld1             {v17.4s}, [x11] // in25 (offset 12)
3236*c0909341SAndroid Build Coastguard Worker        ld1             {v18.4s}, [x9]  // in23 (offset 11)
3237*c0909341SAndroid Build Coastguard Worker        ld1             {v19.4s}, [x7]  // in9  (offset 4)
3238*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x7],  \clear
3239*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x9],  \clear
3240*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x10], \clear
3241*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.4s}, [x11], \clear
3242*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19
3243*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3244*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3245*c0909341SAndroid Build Coastguard Worker        movi_if         v7.4s,  #0, \clear
3246*c0909341SAndroid Build Coastguard Worker        sub             x10, x10, x8, lsl #1 // offset 1
3247*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8, lsl #1 // offset 9
3248*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8         // offset 5
3249*c0909341SAndroid Build Coastguard Worker        add             x11, x11, x8         // offset 13
3250*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x10, x8] // in5  (offset 2)
3251*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x11]     // in27 (offset 13)
3252*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9,  x8] // in21 (offset 10)
3253*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x7]      // in11 (offset 5)
3254*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x10, x8], \clear
3255*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x11],     \clear
3256*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x9,  x8], \clear
3257*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x7],      \clear
3258*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19
3259*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3260*c0909341SAndroid Build Coastguard Worker        movz16dup_if    v0.2s, w16, #2896*8, \scale
3261*c0909341SAndroid Build Coastguard Worker        movi_if         v7.4s,  #0, \clear
3262*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x10]     // in3  (offset 1)
3263*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x11, x8] // in29 (offset 14)
3264*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9]      // in19 (offset 9)
3265*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x7,  x8] // in13 (offset 6)
3266*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x10],     \clear
3267*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x11, x8], \clear
3268*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x9],      \clear
3269*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x7,  x8], \clear
3270*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.s[0], v16, v17, v18, v19
3271*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3272*c0909341SAndroid Build Coastguard Worker
3273*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #4*4*32
3274*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  #4*4*7
3275*c0909341SAndroid Build Coastguard Worker
3276*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step2_neon
3277*c0909341SAndroid Build Coastguard Worker
3278*c0909341SAndroid Build Coastguard Worker        ret             x14
3279*c0909341SAndroid Build Coastguard Workerendfunc
3280*c0909341SAndroid Build Coastguard Worker.endm
3281*c0909341SAndroid Build Coastguard Worker
3282*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear, clear=1
3283*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear_scale, clear=1, scale=1
3284*c0909341SAndroid Build Coastguard Worker
3285*c0909341SAndroid Build Coastguard Worker
3286*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz_dct_64x4_neon
3287*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
3288*c0909341SAndroid Build Coastguard Worker
3289*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
3290*c0909341SAndroid Build Coastguard Worker        add             x8,  sp,  #4*4*(64 - 4)
3291*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  #2*56
3292*c0909341SAndroid Build Coastguard Worker        mov             x10, #2*64
3293*c0909341SAndroid Build Coastguard Worker        mov             x11, #-4*4*4
3294*c0909341SAndroid Build Coastguard Worker
3295*c0909341SAndroid Build Coastguard Worker        dup             v7.4s,  w12
3296*c0909341SAndroid Build Coastguard Worker1:
3297*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4s, v17.4s, v18.4s, v19.4s}, [x7], #64
3298*c0909341SAndroid Build Coastguard Worker        ld1             {v28.4s, v29.4s, v30.4s, v31.4s}, [x8], x11
3299*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4s, v21.4s, v22.4s, v23.4s}, [x7], #64
3300*c0909341SAndroid Build Coastguard Worker        ld1             {v24.4s, v25.4s, v26.4s, v27.4s}, [x8], x11
3301*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v16, v17, v18, v19, v2,  v3,  v4,  v5
3302*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v20, v21, v22, v23, v2,  v3,  v4,  v5
3303*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v31, v30, v29, v28, v2,  v3,  v4,  v5
3304*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  v27, v26, v25, v24, v2,  v3,  v4,  v5
3305*c0909341SAndroid Build Coastguard Worker
3306*c0909341SAndroid Build Coastguard Worker.macro store_addsub src0, src1, src2, src3
3307*c0909341SAndroid Build Coastguard Worker        sqsub           v1.4s,   \src0,   \src1
3308*c0909341SAndroid Build Coastguard Worker        sqadd           v0.4s,   \src0,   \src1
3309*c0909341SAndroid Build Coastguard Worker        sqsub           v3.4s,   \src2,   \src3
3310*c0909341SAndroid Build Coastguard Worker        srshl           v1.4s,   v1.4s,   v7.4s
3311*c0909341SAndroid Build Coastguard Worker        sqadd           v2.4s,   \src2,   \src3
3312*c0909341SAndroid Build Coastguard Worker        srshl           v3.4s,   v3.4s,   v7.4s
3313*c0909341SAndroid Build Coastguard Worker        srshl           v0.4s,   v0.4s,   v7.4s
3314*c0909341SAndroid Build Coastguard Worker        srshl           v2.4s,   v2.4s,   v7.4s
3315*c0909341SAndroid Build Coastguard Worker        sqxtn           v3.4h,   v3.4s
3316*c0909341SAndroid Build Coastguard Worker        sqxtn2          v3.8h,   v1.4s
3317*c0909341SAndroid Build Coastguard Worker        sqxtn           v0.4h,   v0.4s
3318*c0909341SAndroid Build Coastguard Worker        sqxtn2          v0.8h,   v2.4s
3319*c0909341SAndroid Build Coastguard Worker        rev64           v3.8h,   v3.8h
3320*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h},  [x6], x10
3321*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h},  [x9], x10
3322*c0909341SAndroid Build Coastguard Worker.endm
3323*c0909341SAndroid Build Coastguard Worker        store_addsub    v16.4s,  v31.4s,  v20.4s,  v27.4s
3324*c0909341SAndroid Build Coastguard Worker        store_addsub    v17.4s,  v30.4s,  v21.4s,  v26.4s
3325*c0909341SAndroid Build Coastguard Worker        store_addsub    v18.4s,  v29.4s,  v22.4s,  v25.4s
3326*c0909341SAndroid Build Coastguard Worker        store_addsub    v19.4s,  v28.4s,  v23.4s,  v24.4s
3327*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
3328*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  x10, lsl #2
3329*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x10, lsl #2
3330*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #16
3331*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  #16
3332*c0909341SAndroid Build Coastguard Worker
3333*c0909341SAndroid Build Coastguard Worker        cmp             x7,  x8
3334*c0909341SAndroid Build Coastguard Worker        b.lt            1b
3335*c0909341SAndroid Build Coastguard Worker        ret             x14
3336*c0909341SAndroid Build Coastguard Workerendfunc
3337*c0909341SAndroid Build Coastguard Worker
3338*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x64_neon
3339*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
3340*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
3341*c0909341SAndroid Build Coastguard Worker
3342*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
3343*c0909341SAndroid Build Coastguard Worker        add             x8,  sp,  #2*8*(64 - 4)
3344*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  x1, lsl #6
3345*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x1
3346*c0909341SAndroid Build Coastguard Worker        neg             x10, x1
3347*c0909341SAndroid Build Coastguard Worker        mov             x11, #-2*8*4
3348*c0909341SAndroid Build Coastguard Worker
3349*c0909341SAndroid Build Coastguard Worker1:
3350*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x7], #64
3351*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8h, v29.8h, v30.8h, v31.8h}, [x8], x11
3352*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x7], #64
3353*c0909341SAndroid Build Coastguard Worker        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x8], x11
3354*c0909341SAndroid Build Coastguard Worker
3355*c0909341SAndroid Build Coastguard Worker        mvni            v7.8h,   #0xfc, lsl #8 // 0x3ff
3356*c0909341SAndroid Build Coastguard Worker.macro add_dest_addsub src0, src1, src2, src3
3357*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x6], x1
3358*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8h}, [x9], x10
3359*c0909341SAndroid Build Coastguard Worker        sqadd           v4.8h,   \src0,   \src1
3360*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h}, [x6]
3361*c0909341SAndroid Build Coastguard Worker        sqsub           \src0,   \src0,   \src1
3362*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8h}, [x9]
3363*c0909341SAndroid Build Coastguard Worker        sqadd           v5.8h,   \src2,   \src3
3364*c0909341SAndroid Build Coastguard Worker        sqsub           \src2,   \src2,   \src3
3365*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  x1
3366*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x10
3367*c0909341SAndroid Build Coastguard Worker        srshr           v4.8h,   v4.8h,   #4
3368*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
3369*c0909341SAndroid Build Coastguard Worker        srshr           \src0,   \src0,   #4
3370*c0909341SAndroid Build Coastguard Worker        usqadd          v0.8h,   v4.8h
3371*c0909341SAndroid Build Coastguard Worker        srshr           \src2,   \src2,   #4
3372*c0909341SAndroid Build Coastguard Worker        usqadd          v1.8h,   \src0
3373*c0909341SAndroid Build Coastguard Worker        usqadd          v2.8h,   v5.8h
3374*c0909341SAndroid Build Coastguard Worker        smin            v0.8h,   v0.8h,   v7.8h
3375*c0909341SAndroid Build Coastguard Worker        usqadd          v3.8h,   \src2
3376*c0909341SAndroid Build Coastguard Worker        smin            v1.8h,   v1.8h,   v7.8h
3377*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h}, [x6], x1
3378*c0909341SAndroid Build Coastguard Worker        smin            v2.8h,   v2.8h,   v7.8h
3379*c0909341SAndroid Build Coastguard Worker        st1             {v1.8h}, [x9], x10
3380*c0909341SAndroid Build Coastguard Worker        smin            v3.8h,   v3.8h,   v7.8h
3381*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6], x1
3382*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h}, [x9], x10
3383*c0909341SAndroid Build Coastguard Worker.endm
3384*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v16.8h,  v31.8h,  v17.8h,  v30.8h
3385*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v18.8h,  v29.8h,  v19.8h,  v28.8h
3386*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v20.8h,  v27.8h,  v21.8h,  v26.8h
3387*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v22.8h,  v25.8h,  v23.8h,  v24.8h
3388*c0909341SAndroid Build Coastguard Worker.purgem add_dest_addsub
3389*c0909341SAndroid Build Coastguard Worker        cmp             x7,  x8
3390*c0909341SAndroid Build Coastguard Worker        b.lt            1b
3391*c0909341SAndroid Build Coastguard Worker
3392*c0909341SAndroid Build Coastguard Worker        ret             x14
3393*c0909341SAndroid Build Coastguard Workerendfunc
3394*c0909341SAndroid Build Coastguard Worker
3395*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x64_16bpc_neon, export=1
3396*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  64,  2
3397*c0909341SAndroid Build Coastguard Worker
3398*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3399*c0909341SAndroid Build Coastguard Worker
3400*c0909341SAndroid Build Coastguard Worker        sub_sp          64*32*2+64*4*4
3401*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*4*4
3402*c0909341SAndroid Build Coastguard Worker
3403*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3404*c0909341SAndroid Build Coastguard Worker
3405*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3406*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3407*c0909341SAndroid Build Coastguard Worker.if \i > 0
3408*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3409*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3410*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3411*c0909341SAndroid Build Coastguard Worker.endif
3412*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
3413*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*4
3414*c0909341SAndroid Build Coastguard Worker        mov             x12, #-2 // shift
3415*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_4s_x64_neon
3416*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3417*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3418*c0909341SAndroid Build Coastguard Worker.if \i < 28
3419*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3420*c0909341SAndroid Build Coastguard Worker.endif
3421*c0909341SAndroid Build Coastguard Worker.endr
3422*c0909341SAndroid Build Coastguard Worker        b               3f
3423*c0909341SAndroid Build Coastguard Worker
3424*c0909341SAndroid Build Coastguard Worker1:
3425*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3426*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3427*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3428*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3429*c0909341SAndroid Build Coastguard Worker2:
3430*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3431*c0909341SAndroid Build Coastguard Worker.rept 4
3432*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3433*c0909341SAndroid Build Coastguard Worker.endr
3434*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3435*c0909341SAndroid Build Coastguard Worker
3436*c0909341SAndroid Build Coastguard Worker3:
3437*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3438*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3439*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3440*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_8h_x64_neon)
3441*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
3442*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3443*c0909341SAndroid Build Coastguard Worker.endr
3444*c0909341SAndroid Build Coastguard Worker
3445*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #64*32*2
3446*c0909341SAndroid Build Coastguard Worker        ret             x15
3447*c0909341SAndroid Build Coastguard Workerendfunc
3448*c0909341SAndroid Build Coastguard Worker
3449*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x32_16bpc_neon, export=1
3450*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  32,  1
3451*c0909341SAndroid Build Coastguard Worker
3452*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3453*c0909341SAndroid Build Coastguard Worker
3454*c0909341SAndroid Build Coastguard Worker        sub_sp          64*32*2+64*4*4
3455*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*4*4
3456*c0909341SAndroid Build Coastguard Worker
3457*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3458*c0909341SAndroid Build Coastguard Worker
3459*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3460*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3461*c0909341SAndroid Build Coastguard Worker.if \i > 0
3462*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3463*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3464*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3465*c0909341SAndroid Build Coastguard Worker.endif
3466*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
3467*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*4
3468*c0909341SAndroid Build Coastguard Worker        mov             x12, #-1 // shift
3469*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_scale_4s_x64_neon
3470*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3471*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3472*c0909341SAndroid Build Coastguard Worker.if \i < 28
3473*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3474*c0909341SAndroid Build Coastguard Worker.endif
3475*c0909341SAndroid Build Coastguard Worker.endr
3476*c0909341SAndroid Build Coastguard Worker        b               3f
3477*c0909341SAndroid Build Coastguard Worker
3478*c0909341SAndroid Build Coastguard Worker1:
3479*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3480*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3481*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3482*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3483*c0909341SAndroid Build Coastguard Worker2:
3484*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3485*c0909341SAndroid Build Coastguard Worker.rept 4
3486*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3487*c0909341SAndroid Build Coastguard Worker.endr
3488*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3489*c0909341SAndroid Build Coastguard Worker
3490*c0909341SAndroid Build Coastguard Worker3:
3491*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3492*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
3493*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3494*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3495*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
3496*c0909341SAndroid Build Coastguard Worker.endr
3497*c0909341SAndroid Build Coastguard Worker
3498*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #64*32*2
3499*c0909341SAndroid Build Coastguard Worker        ret             x15
3500*c0909341SAndroid Build Coastguard Workerendfunc
3501*c0909341SAndroid Build Coastguard Worker
3502*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x64_16bpc_neon, export=1
3503*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  64,  1
3504*c0909341SAndroid Build Coastguard Worker
3505*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3506*c0909341SAndroid Build Coastguard Worker
3507*c0909341SAndroid Build Coastguard Worker        sub_sp          32*32*2+64*8*2
3508*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3509*c0909341SAndroid Build Coastguard Worker
3510*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3511*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3512*c0909341SAndroid Build Coastguard Worker
3513*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3514*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*32*2)
3515*c0909341SAndroid Build Coastguard Worker.if \i > 0
3516*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3517*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3518*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3519*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3520*c0909341SAndroid Build Coastguard Worker.endif
3521*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
3522*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*4
3523*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x4_neon
3524*c0909341SAndroid Build Coastguard Worker.endr
3525*c0909341SAndroid Build Coastguard Worker        b               3f
3526*c0909341SAndroid Build Coastguard Worker
3527*c0909341SAndroid Build Coastguard Worker1:
3528*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3529*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3530*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3531*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3532*c0909341SAndroid Build Coastguard Worker2:
3533*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
3534*c0909341SAndroid Build Coastguard Worker.rept 4
3535*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3536*c0909341SAndroid Build Coastguard Worker.endr
3537*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3538*c0909341SAndroid Build Coastguard Worker
3539*c0909341SAndroid Build Coastguard Worker3:
3540*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3541*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3542*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3543*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_8h_x64_neon)
3544*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
3545*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3546*c0909341SAndroid Build Coastguard Worker.endr
3547*c0909341SAndroid Build Coastguard Worker
3548*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #32*32*2
3549*c0909341SAndroid Build Coastguard Worker        ret             x15
3550*c0909341SAndroid Build Coastguard Workerendfunc
3551*c0909341SAndroid Build Coastguard Worker
3552*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x16_16bpc_neon, export=1
3553*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  16,  2
3554*c0909341SAndroid Build Coastguard Worker
3555*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3556*c0909341SAndroid Build Coastguard Worker
3557*c0909341SAndroid Build Coastguard Worker        sub_sp          64*16*2+64*4*4
3558*c0909341SAndroid Build Coastguard Worker        add             x4,  sp, #64*4*4
3559*c0909341SAndroid Build Coastguard Worker
3560*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
3561*c0909341SAndroid Build Coastguard Worker
3562*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
3563*c0909341SAndroid Build Coastguard Worker        add             x6,  x4,  #(\i*64*2)
3564*c0909341SAndroid Build Coastguard Worker.if \i > 0
3565*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(16 - \i)
3566*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3567*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3568*c0909341SAndroid Build Coastguard Worker.endif
3569*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
3570*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*4
3571*c0909341SAndroid Build Coastguard Worker        mov             x12, #-2 // shift
3572*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_4s_x64_neon
3573*c0909341SAndroid Build Coastguard Worker        add             x6,  x4,  #(\i*64*2)
3574*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3575*c0909341SAndroid Build Coastguard Worker.if \i < 12
3576*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3577*c0909341SAndroid Build Coastguard Worker.endif
3578*c0909341SAndroid Build Coastguard Worker.endr
3579*c0909341SAndroid Build Coastguard Worker        b               3f
3580*c0909341SAndroid Build Coastguard Worker
3581*c0909341SAndroid Build Coastguard Worker1:
3582*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3583*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3584*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3585*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3586*c0909341SAndroid Build Coastguard Worker2:
3587*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3588*c0909341SAndroid Build Coastguard Worker.rept 4
3589*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3590*c0909341SAndroid Build Coastguard Worker.endr
3591*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3592*c0909341SAndroid Build Coastguard Worker
3593*c0909341SAndroid Build Coastguard Worker3:
3594*c0909341SAndroid Build Coastguard Worker        movrel          x5,  X(inv_dct_8h_x16_neon)
3595*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3596*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
3597*c0909341SAndroid Build Coastguard Worker        add             x7,  x4,  #(\i*2)
3598*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3599*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
3600*c0909341SAndroid Build Coastguard Worker.endr
3601*c0909341SAndroid Build Coastguard Worker
3602*c0909341SAndroid Build Coastguard Worker        add             sp,  x4,  #64*16*2
3603*c0909341SAndroid Build Coastguard Worker        ret             x15
3604*c0909341SAndroid Build Coastguard Workerendfunc
3605*c0909341SAndroid Build Coastguard Worker
3606*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x64_16bpc_neon, export=1
3607*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  64,  2
3608*c0909341SAndroid Build Coastguard Worker
3609*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3610*c0909341SAndroid Build Coastguard Worker
3611*c0909341SAndroid Build Coastguard Worker        sub_sp          16*32*2+64*8*2
3612*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3613*c0909341SAndroid Build Coastguard Worker
3614*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
3615*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3616*c0909341SAndroid Build Coastguard Worker
3617*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_dct_4s_x16_neon
3618*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3619*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*16*2)
3620*c0909341SAndroid Build Coastguard Worker.if \i > 0
3621*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3622*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3623*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3624*c0909341SAndroid Build Coastguard Worker.if \i < 28
3625*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3626*c0909341SAndroid Build Coastguard Worker.endif
3627*c0909341SAndroid Build Coastguard Worker.endif
3628*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*4)
3629*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*4
3630*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x4_neon
3631*c0909341SAndroid Build Coastguard Worker.endr
3632*c0909341SAndroid Build Coastguard Worker        b               3f
3633*c0909341SAndroid Build Coastguard Worker
3634*c0909341SAndroid Build Coastguard Worker1:
3635*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3636*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3637*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3638*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3639*c0909341SAndroid Build Coastguard Worker2:
3640*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
3641*c0909341SAndroid Build Coastguard Worker.rept 2
3642*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3643*c0909341SAndroid Build Coastguard Worker.endr
3644*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3645*c0909341SAndroid Build Coastguard Worker
3646*c0909341SAndroid Build Coastguard Worker3:
3647*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
3648*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3649*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
3650*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_8h_x64_neon)
3651*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i*2)
3652*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3653*c0909341SAndroid Build Coastguard Worker.endr
3654*c0909341SAndroid Build Coastguard Worker
3655*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #16*32*2
3656*c0909341SAndroid Build Coastguard Worker        ret             x15
3657*c0909341SAndroid Build Coastguard Workerendfunc
3658