xref: /aosp_15_r20/external/libdav1d/src/arm/32/itx16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker
31*c0909341SAndroid Build Coastguard Worker// The exported functions in this file have got the following signature:
32*c0909341SAndroid Build Coastguard Worker// void itxfm_add(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob);
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker// Most of the functions use the following register layout:
35*c0909341SAndroid Build Coastguard Worker// r0-r3   external parameters
36*c0909341SAndroid Build Coastguard Worker// r4      function pointer to first transform
37*c0909341SAndroid Build Coastguard Worker// r5      function pointer to second transform
38*c0909341SAndroid Build Coastguard Worker// r6      output parameter for helper function
39*c0909341SAndroid Build Coastguard Worker// r7      input parameter for helper function
40*c0909341SAndroid Build Coastguard Worker// r8      input stride for helper function
41*c0909341SAndroid Build Coastguard Worker// r9      scratch variable for helper functions
42*c0909341SAndroid Build Coastguard Worker// r10-r11 pointer to list of eob thresholds, eob threshold value,
43*c0909341SAndroid Build Coastguard Worker//         scratch variables within helper functions (backed up)
44*c0909341SAndroid Build Coastguard Worker
45*c0909341SAndroid Build Coastguard Worker// The SIMD registers most often use the following layout:
46*c0909341SAndroid Build Coastguard Worker// d0-d3   multiplication coefficients
47*c0909341SAndroid Build Coastguard Worker// d4-d7   scratch registers
48*c0909341SAndroid Build Coastguard Worker// d8-d15  unused in some transforms, used for scratch registers in others
49*c0909341SAndroid Build Coastguard Worker// d16-v31 inputs/outputs of transforms
50*c0909341SAndroid Build Coastguard Worker
51*c0909341SAndroid Build Coastguard Worker// Potential further optimizations, that are left unimplemented for now:
52*c0909341SAndroid Build Coastguard Worker// - Trying to keep multiplication coefficients in registers across multiple
53*c0909341SAndroid Build Coastguard Worker//   transform functions. (The register layout is designed to potentially
54*c0909341SAndroid Build Coastguard Worker//   allow this.)
55*c0909341SAndroid Build Coastguard Worker// - Use a simplified version of the transforms themselves for cases where
56*c0909341SAndroid Build Coastguard Worker//   we know a significant number of inputs are zero. E.g. if the eob value
57*c0909341SAndroid Build Coastguard Worker//   indicates only a quarter of input values are set, for idct16 and up,
58*c0909341SAndroid Build Coastguard Worker//   a significant amount of calculation can be skipped, at the cost of more
59*c0909341SAndroid Build Coastguard Worker//   code duplication and special casing.
60*c0909341SAndroid Build Coastguard Worker
61*c0909341SAndroid Build Coastguard Worker// A macro for cases where a thumb mov can express the constant in one
62*c0909341SAndroid Build Coastguard Worker// instruction, while arm mode requires two separate movw+movt pairs.
63*c0909341SAndroid Build Coastguard Worker.macro mov_const reg, val
64*c0909341SAndroid Build Coastguard Worker#if CONFIG_THUMB
65*c0909341SAndroid Build Coastguard Worker        mov.w           \reg, #\val
66*c0909341SAndroid Build Coastguard Worker#else
67*c0909341SAndroid Build Coastguard Worker        movw            \reg, #((\val) & 0xffff)
68*c0909341SAndroid Build Coastguard Worker        movt            \reg, #(((\val) >> 16) & 0xffff)
69*c0909341SAndroid Build Coastguard Worker#endif
70*c0909341SAndroid Build Coastguard Worker.endm
71*c0909341SAndroid Build Coastguard Worker
72*c0909341SAndroid Build Coastguard Workerconst idct_coeffs, align=4
73*c0909341SAndroid Build Coastguard Worker        // idct4
74*c0909341SAndroid Build Coastguard Worker        .int            2896, 2896*8*(1<<16), 1567, 3784
75*c0909341SAndroid Build Coastguard Worker        // idct8
76*c0909341SAndroid Build Coastguard Worker        .int            799, 4017, 3406, 2276
77*c0909341SAndroid Build Coastguard Worker        // idct16
78*c0909341SAndroid Build Coastguard Worker        .int            401, 4076, 3166, 2598
79*c0909341SAndroid Build Coastguard Worker        .int            1931, 3612, 3920, 1189
80*c0909341SAndroid Build Coastguard Worker        // idct32
81*c0909341SAndroid Build Coastguard Worker        .int            201, 4091, 3035, 2751
82*c0909341SAndroid Build Coastguard Worker        .int            1751, 3703, 3857, 1380
83*c0909341SAndroid Build Coastguard Worker        .int            995, 3973, 3513, 2106
84*c0909341SAndroid Build Coastguard Worker        .int            2440, 3290, 4052, 601
85*c0909341SAndroid Build Coastguard Workerendconst
86*c0909341SAndroid Build Coastguard Worker
87*c0909341SAndroid Build Coastguard Workerconst idct64_coeffs, align=4
88*c0909341SAndroid Build Coastguard Worker        .int            101*8*(1<<16), 4095*8*(1<<16), 2967*8*(1<<16), -2824*8*(1<<16)
89*c0909341SAndroid Build Coastguard Worker        .int            1660*8*(1<<16), 3745*8*(1<<16), 3822*8*(1<<16), -1474*8*(1<<16)
90*c0909341SAndroid Build Coastguard Worker        .int            4076, 401, 4017, 799
91*c0909341SAndroid Build Coastguard Worker
92*c0909341SAndroid Build Coastguard Worker        .int            4036*8*(1<<16), -700*8*(1<<16), 2359*8*(1<<16), 3349*8*(1<<16)
93*c0909341SAndroid Build Coastguard Worker        .int            3461*8*(1<<16), -2191*8*(1<<16), 897*8*(1<<16), 3996*8*(1<<16)
94*c0909341SAndroid Build Coastguard Worker        .int            -3166, -2598, -799, -4017
95*c0909341SAndroid Build Coastguard Worker
96*c0909341SAndroid Build Coastguard Worker        .int            501*8*(1<<16), 4065*8*(1<<16), 3229*8*(1<<16), -2520*8*(1<<16)
97*c0909341SAndroid Build Coastguard Worker        .int            2019*8*(1<<16), 3564*8*(1<<16), 3948*8*(1<<16), -1092*8*(1<<16)
98*c0909341SAndroid Build Coastguard Worker        .int            3612, 1931, 2276, 3406
99*c0909341SAndroid Build Coastguard Worker
100*c0909341SAndroid Build Coastguard Worker        .int            4085*8*(1<<16), -301*8*(1<<16), 2675*8*(1<<16), 3102*8*(1<<16)
101*c0909341SAndroid Build Coastguard Worker        .int            3659*8*(1<<16), -1842*8*(1<<16), 1285*8*(1<<16), 3889*8*(1<<16)
102*c0909341SAndroid Build Coastguard Worker        .int            -3920, -1189, -3406, -2276
103*c0909341SAndroid Build Coastguard Workerendconst
104*c0909341SAndroid Build Coastguard Worker
105*c0909341SAndroid Build Coastguard Workerconst iadst4_coeffs, align=4
106*c0909341SAndroid Build Coastguard Worker        .int            1321, 3803, 2482, 3344
107*c0909341SAndroid Build Coastguard Workerendconst
108*c0909341SAndroid Build Coastguard Worker
109*c0909341SAndroid Build Coastguard Workerconst iadst8_coeffs, align=4
110*c0909341SAndroid Build Coastguard Worker        .int            4076, 401, 3612, 1931
111*c0909341SAndroid Build Coastguard Worker        .int            2598, 3166, 1189, 3920
112*c0909341SAndroid Build Coastguard Worker        // idct_coeffs
113*c0909341SAndroid Build Coastguard Worker        .int            2896, 0, 1567, 3784
114*c0909341SAndroid Build Coastguard Workerendconst
115*c0909341SAndroid Build Coastguard Worker
116*c0909341SAndroid Build Coastguard Workerconst iadst16_coeffs, align=4
117*c0909341SAndroid Build Coastguard Worker        .int            4091, 201, 3973, 995
118*c0909341SAndroid Build Coastguard Worker        .int            3703, 1751, 3290, 2440
119*c0909341SAndroid Build Coastguard Worker        .int            2751, 3035, 2106, 3513
120*c0909341SAndroid Build Coastguard Worker        .int            1380, 3857, 601, 4052
121*c0909341SAndroid Build Coastguard Workerendconst
122*c0909341SAndroid Build Coastguard Worker
123*c0909341SAndroid Build Coastguard Worker.macro vmul_vmla d0, s0, s1, c0, c1
124*c0909341SAndroid Build Coastguard Worker        vmul.i32        \d0, \s0, \c0
125*c0909341SAndroid Build Coastguard Worker        vmla.i32        \d0, \s1, \c1
126*c0909341SAndroid Build Coastguard Worker.endm
127*c0909341SAndroid Build Coastguard Worker
128*c0909341SAndroid Build Coastguard Worker.macro vmul_vmls d0, s0, s1, c0, c1
129*c0909341SAndroid Build Coastguard Worker        vmul.i32        \d0, \s0, \c0
130*c0909341SAndroid Build Coastguard Worker        vmls.i32        \d0, \s1, \c1
131*c0909341SAndroid Build Coastguard Worker.endm
132*c0909341SAndroid Build Coastguard Worker
133*c0909341SAndroid Build Coastguard Worker.macro scale_input c, r0, r1, r2 r3, r4, r5, r6, r7
134*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r0, \r0, \c
135*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r1, \r1, \c
136*c0909341SAndroid Build Coastguard Worker.ifnb \r2
137*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r2, \r2, \c
138*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r3, \r3, \c
139*c0909341SAndroid Build Coastguard Worker.endif
140*c0909341SAndroid Build Coastguard Worker.ifnb \r4
141*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r4, \r4, \c
142*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r5, \r5, \c
143*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r6, \r6, \c
144*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    \r7, \r7, \c
145*c0909341SAndroid Build Coastguard Worker.endif
146*c0909341SAndroid Build Coastguard Worker.endm
147*c0909341SAndroid Build Coastguard Worker
148*c0909341SAndroid Build Coastguard Worker.macro load_add_store load, shift, addsrc, adddst, max, min, store, dst, src, shiftbits=4
149*c0909341SAndroid Build Coastguard Worker.ifnb \load
150*c0909341SAndroid Build Coastguard Worker        vld1.16         {\load},  [\src, :128], r1
151*c0909341SAndroid Build Coastguard Worker.endif
152*c0909341SAndroid Build Coastguard Worker.ifnb \shift
153*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \shift,  \shift,  #\shiftbits
154*c0909341SAndroid Build Coastguard Worker.endif
155*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
156*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \adddst, \adddst, \addsrc
157*c0909341SAndroid Build Coastguard Worker.endif
158*c0909341SAndroid Build Coastguard Worker.ifnb \max
159*c0909341SAndroid Build Coastguard Worker        vmax.s16        \max, \max, q6
160*c0909341SAndroid Build Coastguard Worker.endif
161*c0909341SAndroid Build Coastguard Worker.ifnb \min
162*c0909341SAndroid Build Coastguard Worker        vmin.s16        \min, \min, q7
163*c0909341SAndroid Build Coastguard Worker.endif
164*c0909341SAndroid Build Coastguard Worker.ifnb \store
165*c0909341SAndroid Build Coastguard Worker        vst1.16         {\store},  [\dst, :128], r1
166*c0909341SAndroid Build Coastguard Worker.endif
167*c0909341SAndroid Build Coastguard Worker.endm
168*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x8 dst, src, shiftbits=4
169*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
170*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
171*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
172*c0909341SAndroid Build Coastguard Worker        load_add_store  q0,  q8,    ,    ,    ,    ,    ,  \dst, \src, \shiftbits
173*c0909341SAndroid Build Coastguard Worker        load_add_store  q1,  q9,    ,    ,    ,    ,    ,  \dst, \src, \shiftbits
174*c0909341SAndroid Build Coastguard Worker        load_add_store  q2,  q10, q0,  q8,    ,    ,    ,  \dst, \src, \shiftbits
175*c0909341SAndroid Build Coastguard Worker        load_add_store  q3,  q11, q1,  q9,  q8,    ,    ,  \dst, \src, \shiftbits
176*c0909341SAndroid Build Coastguard Worker        load_add_store  q4,  q12, q2,  q10, q9,  q8,    ,  \dst, \src, \shiftbits
177*c0909341SAndroid Build Coastguard Worker        load_add_store  q5,  q13, q3,  q11, q10, q9,  q8,  \dst, \src, \shiftbits
178*c0909341SAndroid Build Coastguard Worker        load_add_store  q0,  q14, q4,  q12, q11, q10, q9,  \dst, \src, \shiftbits
179*c0909341SAndroid Build Coastguard Worker        load_add_store  q1,  q15, q5,  q13, q12, q11, q10, \dst, \src, \shiftbits
180*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , q0,  q14, q13, q12, q11, \dst, \src, \shiftbits
181*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , q1,  q15, q14, q13, q12, \dst, \src, \shiftbits
182*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     , q15, q14, q13, \dst, \src, \shiftbits
183*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    , q15, q14, \dst, \src, \shiftbits
184*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    ,    , q15, \dst, \src, \shiftbits
185*c0909341SAndroid Build Coastguard Worker.endm
186*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x4 dst, src, shiftbits=4
187*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
188*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
189*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
190*c0909341SAndroid Build Coastguard Worker        load_add_store  q0,  q8,    ,    ,    ,    ,    ,  \dst, \src, \shiftbits
191*c0909341SAndroid Build Coastguard Worker        load_add_store  q1,  q9,    ,    ,    ,    ,    ,  \dst, \src, \shiftbits
192*c0909341SAndroid Build Coastguard Worker        load_add_store  q2,  q10, q0,  q8,    ,    ,    ,  \dst, \src, \shiftbits
193*c0909341SAndroid Build Coastguard Worker        load_add_store  q3,  q11, q1,  q9,  q8,    ,    ,  \dst, \src, \shiftbits
194*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , q2,  q10, q9,  q8,    ,  \dst, \src, \shiftbits
195*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , q3,  q11, q10, q9,  q8,  \dst, \src, \shiftbits
196*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     , q11, q10, q9,  \dst, \src, \shiftbits
197*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    , q11, q10, \dst, \src, \shiftbits
198*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    ,    , q11, \dst, \src, \shiftbits
199*c0909341SAndroid Build Coastguard Worker.endm
200*c0909341SAndroid Build Coastguard Worker.macro load_add_store4 load1, load2, shift, addsrc, adddst, max, min, store1, store2, dst, src, shiftbits=4
201*c0909341SAndroid Build Coastguard Worker.ifnb \load1
202*c0909341SAndroid Build Coastguard Worker        vld1.16         {\load1},  [\src, :64], r1
203*c0909341SAndroid Build Coastguard Worker.endif
204*c0909341SAndroid Build Coastguard Worker.ifnb \shift
205*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \shift,  \shift,  #\shiftbits
206*c0909341SAndroid Build Coastguard Worker.endif
207*c0909341SAndroid Build Coastguard Worker.ifnb \load2
208*c0909341SAndroid Build Coastguard Worker        vld1.16         {\load2},  [\src, :64], r1
209*c0909341SAndroid Build Coastguard Worker.endif
210*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
211*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \adddst, \adddst, \addsrc
212*c0909341SAndroid Build Coastguard Worker.endif
213*c0909341SAndroid Build Coastguard Worker.ifnb \max
214*c0909341SAndroid Build Coastguard Worker        vmax.s16        \max, \max, q6
215*c0909341SAndroid Build Coastguard Worker.endif
216*c0909341SAndroid Build Coastguard Worker.ifnb \store1
217*c0909341SAndroid Build Coastguard Worker        vst1.16         {\store1},  [\dst, :64], r1
218*c0909341SAndroid Build Coastguard Worker.endif
219*c0909341SAndroid Build Coastguard Worker.ifnb \min
220*c0909341SAndroid Build Coastguard Worker        vmin.s16        \min, \min, q7
221*c0909341SAndroid Build Coastguard Worker.endif
222*c0909341SAndroid Build Coastguard Worker.ifnb \store2
223*c0909341SAndroid Build Coastguard Worker        vst1.16         {\store2},  [\dst, :64], r1
224*c0909341SAndroid Build Coastguard Worker.endif
225*c0909341SAndroid Build Coastguard Worker.endm
226*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x16 dst, src
227*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
228*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
229*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
230*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
231*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,  d1,  q8,    ,    ,    ,    ,     ,    , \dst, \src
232*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  d3,  q9,    ,    ,    ,    ,     ,    , \dst, \src
233*c0909341SAndroid Build Coastguard Worker        load_add_store4 d4,  d5,  q10, q0,  q8,    ,    ,     ,    , \dst, \src
234*c0909341SAndroid Build Coastguard Worker        load_add_store4 d6,  d7,  q11, q1,  q9,  q8,    ,     ,    , \dst, \src
235*c0909341SAndroid Build Coastguard Worker        load_add_store4 d8,  d9,  q12, q2,  q10, q9,  q8,     ,    , \dst, \src
236*c0909341SAndroid Build Coastguard Worker        load_add_store4 d10, d11, q13, q3,  q11, q10, q9,  d16, d17, \dst, \src
237*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,  d1,  q14, q4,  q12, q11, q10, d18, d19, \dst, \src
238*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  d3,  q15, q5,  q13, q12, q11, d20, d21, \dst, \src
239*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,  q0,  q14, q13, q12, d22, d23, \dst, \src
240*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,  q1,  q15, q14, q13, d24, d25, \dst, \src
241*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,    ,   ,   q15, q14, d26, d27, \dst, \src
242*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,    ,   ,     ,  q15, d28, d29, \dst, \src
243*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,    ,   ,     ,     , d30, d31, \dst, \src
244*c0909341SAndroid Build Coastguard Worker.endm
245*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x8 dst, src, shiftbits=4
246*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
247*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
248*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
249*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
250*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,  d1,  q8,    ,    ,    ,    ,     ,    , \dst, \src, \shiftbits
251*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  d3,  q9,    ,    ,    ,    ,     ,    , \dst, \src, \shiftbits
252*c0909341SAndroid Build Coastguard Worker        load_add_store4 d4,  d5,  q10, q0,  q8,    ,    ,     ,    , \dst, \src, \shiftbits
253*c0909341SAndroid Build Coastguard Worker        load_add_store4 d6,  d7,  q11, q1,  q9,  q8,    ,     ,    , \dst, \src, \shiftbits
254*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,     , q2,  q10, q9,  q8,     ,    , \dst, \src, \shiftbits
255*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,     , q3,  q11, q10, q9,  d16, d17, \dst, \src, \shiftbits
256*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,     ,   ,     , q11, q10, d18, d19, \dst, \src, \shiftbits
257*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,     ,   ,     ,    , q11, d20, d21, \dst, \src, \shiftbits
258*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,     ,   ,     ,    ,    , d22, d23, \dst, \src, \shiftbits
259*c0909341SAndroid Build Coastguard Worker.endm
260*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x4 dst, src, shiftbits=4
261*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
262*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
263*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
264*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
265*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,  d1,  q8,   ,   ,   ,   ,    ,    , \dst, \src, \shiftbits
266*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  d3,  q9, q0, q8,   ,   ,    ,    , \dst, \src, \shiftbits
267*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    , q1, q9, q8,   ,    ,    , \dst, \src, \shiftbits
268*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,   ,   , q9, q8,    ,    , \dst, \src, \shiftbits
269*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,   ,   ,   , q9, d16, d17, \dst, \src, \shiftbits
270*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,    ,    ,   ,   ,   ,   , d18, d19, \dst, \src, \shiftbits
271*c0909341SAndroid Build Coastguard Worker.endm
272*c0909341SAndroid Build Coastguard Worker
273*c0909341SAndroid Build Coastguard Worker.macro idct_dc w, h, shift
274*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
275*c0909341SAndroid Build Coastguard Worker        bne             1f
276*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
277*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
278*c0909341SAndroid Build Coastguard Worker        vld1.32         {d24[], d25[]}, [r2, :32]
279*c0909341SAndroid Build Coastguard Worker        vdup.32         d0,  r12
280*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q13, q12, d0[0]
281*c0909341SAndroid Build Coastguard Worker        vst1.32         {d28[0]}, [r2, :32]
282*c0909341SAndroid Build Coastguard Worker.if (\w == 2*\h) || (2*\w == \h)
283*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q13, q13, d0[0]
284*c0909341SAndroid Build Coastguard Worker.endif
285*c0909341SAndroid Build Coastguard Worker.if \shift > 0
286*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q13, #\shift
287*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q13, #\shift
288*c0909341SAndroid Build Coastguard Worker.else
289*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d24, q13
290*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d25, q13
291*c0909341SAndroid Build Coastguard Worker.endif
292*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q12, q12, d0[1]
293*c0909341SAndroid Build Coastguard Worker        mov             r3,  #\h
294*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q12, q12, #4
295*c0909341SAndroid Build Coastguard Worker        b               idct_dc_w\w\()_neon
296*c0909341SAndroid Build Coastguard Worker1:
297*c0909341SAndroid Build Coastguard Worker.endm
298*c0909341SAndroid Build Coastguard Worker
299*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w4_neon
300*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q15, #0xfc00 // 0x3ff
301*c0909341SAndroid Build Coastguard Worker1:
302*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r0, :64], r1
303*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r0, :64], r1
304*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r0, :64], r1
305*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r0, :64], r1
306*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #4
307*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q12
308*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
309*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q12
310*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q14
311*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q14
312*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q15
313*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0}, [r0, :64], r1
314*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q15
315*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1}, [r0, :64], r1
316*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2}, [r0, :64], r1
317*c0909341SAndroid Build Coastguard Worker        vst1.16         {d3}, [r0, :64], r1
318*c0909341SAndroid Build Coastguard Worker        bgt             1b
319*c0909341SAndroid Build Coastguard Worker        bx              lr
320*c0909341SAndroid Build Coastguard Workerendfunc
321*c0909341SAndroid Build Coastguard Worker
322*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w8_neon
323*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q15, #0xfc00 // 0x3ff
324*c0909341SAndroid Build Coastguard Worker1:
325*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r0, :128], r1
326*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #4
327*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1}, [r0, :128], r1
328*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q12
329*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r0, :128], r1
330*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q12
331*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r0, :128], r1
332*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q2,  q12
333*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,  q3,  q12
334*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
335*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q14
336*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q14
337*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
338*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
339*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q15
340*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q15
341*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r0, :128], r1
342*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
343*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1}, [r0, :128], r1
344*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
345*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r0, :128], r1
346*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3}, [r0, :128], r1
347*c0909341SAndroid Build Coastguard Worker        bgt             1b
348*c0909341SAndroid Build Coastguard Worker        bx              lr
349*c0909341SAndroid Build Coastguard Workerendfunc
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w16_neon
352*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q15, #0xfc00 // 0x3ff
353*c0909341SAndroid Build Coastguard Worker1:
354*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r0, :128], r1
355*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #2
356*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r0, :128], r1
357*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q12
358*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q12
359*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q2,  q12
360*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,  q3,  q12
361*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #1
362*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q14
363*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q14
364*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
365*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
366*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q15
367*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q15
368*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
369*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128], r1
370*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
371*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128], r1
372*c0909341SAndroid Build Coastguard Worker        bgt             1b
373*c0909341SAndroid Build Coastguard Worker        bx              lr
374*c0909341SAndroid Build Coastguard Workerendfunc
375*c0909341SAndroid Build Coastguard Worker
376*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w32_neon
377*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
378*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q15, #0xfc00 // 0x3ff
379*c0909341SAndroid Build Coastguard Worker1:
380*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r0, :128]!
381*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #1
382*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r0, :128]
383*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q12
384*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q12
385*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q2,  q12
386*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,  q3,  q12
387*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  #32
388*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q14
389*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q14
390*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
391*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
392*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q15
393*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q15
394*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
395*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
396*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
397*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128], r1
398*c0909341SAndroid Build Coastguard Worker        bgt             1b
399*c0909341SAndroid Build Coastguard Worker        bx              lr
400*c0909341SAndroid Build Coastguard Workerendfunc
401*c0909341SAndroid Build Coastguard Worker
402*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w64_neon
403*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
404*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q15, #0xfc00 // 0x3ff
405*c0909341SAndroid Build Coastguard Worker1:
406*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r0, :128]!
407*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #1
408*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r0, :128]!
409*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q12
410*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r0, :128]!
411*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q12
412*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r0, :128]
413*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q2,  q12
414*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,  q3,  q12
415*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q8,  q8,  q12
416*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q9,  q9,  q12
417*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q10, q10, q12
418*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q11, q11, q12
419*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  #96
420*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q14
421*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q14
422*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
423*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
424*c0909341SAndroid Build Coastguard Worker        vmax.s16        q8,  q8,  q14
425*c0909341SAndroid Build Coastguard Worker        vmax.s16        q9,  q9,  q14
426*c0909341SAndroid Build Coastguard Worker        vmax.s16        q10, q10, q14
427*c0909341SAndroid Build Coastguard Worker        vmax.s16        q11, q11, q14
428*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q15
429*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q15
430*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
431*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
432*c0909341SAndroid Build Coastguard Worker        vmin.s16        q8,  q8,  q15
433*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r0, :128]!
434*c0909341SAndroid Build Coastguard Worker        vmin.s16        q9,  q9,  q15
435*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [r0, :128]!
436*c0909341SAndroid Build Coastguard Worker        vmin.s16        q10, q10, q15
437*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128]!
438*c0909341SAndroid Build Coastguard Worker        vmin.s16        q11, q11, q15
439*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r0, :128], r1
440*c0909341SAndroid Build Coastguard Worker        bgt             1b
441*c0909341SAndroid Build Coastguard Worker        bx              lr
442*c0909341SAndroid Build Coastguard Workerendfunc
443*c0909341SAndroid Build Coastguard Worker
444*c0909341SAndroid Build Coastguard Worker.macro iwht4
445*c0909341SAndroid Build Coastguard Worker        vadd.i32        q8,  q8,  q9
446*c0909341SAndroid Build Coastguard Worker        vsub.i32        q13, q10, q11
447*c0909341SAndroid Build Coastguard Worker        vsub.i32        q12, q8,  q13
448*c0909341SAndroid Build Coastguard Worker        vshr.s32        q12, q12, #1
449*c0909341SAndroid Build Coastguard Worker        vsub.i32        q10, q12, q9
450*c0909341SAndroid Build Coastguard Worker        vsub.i32        q9,  q12, q11
451*c0909341SAndroid Build Coastguard Worker        vadd.i32        q11, q13, q10
452*c0909341SAndroid Build Coastguard Worker        vsub.i32        q8,  q8,  q9
453*c0909341SAndroid Build Coastguard Worker.endm
454*c0909341SAndroid Build Coastguard Worker
455*c0909341SAndroid Build Coastguard Worker.macro idct_4s_x4 r0, r1, r2, r3
456*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q4,  \r1, \r3, d1[1], d1[0]
457*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q2,  \r0, \r2, d0[0], d0[0]
458*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q3,  \r1, \r3, d1[0], d1[1]
459*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q5,  \r0, \r2, d0[0], d0[0]
460*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q4,  q4,  #12
461*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q2,  q2,  #12
462*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q3,  q3,  #12
463*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q5,  q5,  #12
464*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r0, q2,  q4
465*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r3, q2,  q4
466*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r1, q5,  q3
467*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r2, q5,  q3
468*c0909341SAndroid Build Coastguard Worker.endm
469*c0909341SAndroid Build Coastguard Worker
470*c0909341SAndroid Build Coastguard Worker.macro idct_2s_x4 r0, r1, r2, r3
471*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  \r1, \r3, d1[1], d1[0]
472*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  \r0, \r2, d0[0], d0[0]
473*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d5,  \r1, \r3, d1[0], d1[1]
474*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d7,  \r0, \r2, d0[0], d0[0]
475*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d6,  d6,  #12
476*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d4,  d4,  #12
477*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d5,  d5,  #12
478*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d7,  d7,  #12
479*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r0, d4,  d6
480*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r3, d4,  d6
481*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r1, d7,  d5
482*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r2, d7,  d5
483*c0909341SAndroid Build Coastguard Worker.endm
484*c0909341SAndroid Build Coastguard Worker
485*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x4_neon
486*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
487*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0, d1}, [r12, :128]
488*c0909341SAndroid Build Coastguard Worker        idct_4s_x4      q8,  q9,  q10, q11
489*c0909341SAndroid Build Coastguard Worker        bx              lr
490*c0909341SAndroid Build Coastguard Workerendfunc
491*c0909341SAndroid Build Coastguard Worker
492*c0909341SAndroid Build Coastguard Worker.macro iadst_4x4 o0, o1, o2, o3
493*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst4_coeffs
494*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0, d1}, [r12, :128]
495*c0909341SAndroid Build Coastguard Worker
496*c0909341SAndroid Build Coastguard Worker        vsub.i32        q1,  q8,  q10
497*c0909341SAndroid Build Coastguard Worker        vmul.i32        q2,  q8,  d0[0]
498*c0909341SAndroid Build Coastguard Worker        vmla.i32        q2,  q10, d0[1]
499*c0909341SAndroid Build Coastguard Worker        vmla.i32        q2,  q11, d1[0]
500*c0909341SAndroid Build Coastguard Worker        vmul.i32        q4,  q9,  d1[1]
501*c0909341SAndroid Build Coastguard Worker        vadd.i32        q1,  q1,  q11
502*c0909341SAndroid Build Coastguard Worker        vmul.i32        q3,  q8,  d1[0]
503*c0909341SAndroid Build Coastguard Worker        vmls.i32        q3,  q10, d0[0]
504*c0909341SAndroid Build Coastguard Worker        vmls.i32        q3,  q11, d0[1]
505*c0909341SAndroid Build Coastguard Worker
506*c0909341SAndroid Build Coastguard Worker        vadd.i32        \o3, q2,  q3
507*c0909341SAndroid Build Coastguard Worker        vmul.i32        \o2, q1,  d1[1]
508*c0909341SAndroid Build Coastguard Worker        vadd.i32        \o0, q2,  q4
509*c0909341SAndroid Build Coastguard Worker        vadd.i32        \o1, q3,  q4
510*c0909341SAndroid Build Coastguard Worker        vsub.i32        \o3, \o3, q4
511*c0909341SAndroid Build Coastguard Worker
512*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o0, \o0, #12
513*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o2, \o2, #12
514*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o1, \o1, #12
515*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o3, \o3, #12
516*c0909341SAndroid Build Coastguard Worker.endm
517*c0909341SAndroid Build Coastguard Worker
518*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x4_neon
519*c0909341SAndroid Build Coastguard Worker        iadst_4x4       q8,  q9,  q10, q11
520*c0909341SAndroid Build Coastguard Worker        bx              lr
521*c0909341SAndroid Build Coastguard Workerendfunc
522*c0909341SAndroid Build Coastguard Worker
523*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x4_neon
524*c0909341SAndroid Build Coastguard Worker        iadst_4x4       q11, q10, q9,  q8
525*c0909341SAndroid Build Coastguard Worker        bx              lr
526*c0909341SAndroid Build Coastguard Workerendfunc
527*c0909341SAndroid Build Coastguard Worker
528*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x4_neon
529*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
530*c0909341SAndroid Build Coastguard Worker        movt            r12, #(5793-4096)*8
531*c0909341SAndroid Build Coastguard Worker        vdup.32         d0,  r12
532*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q1,  q8,  d0[0]
533*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q2,  q9,  d0[0]
534*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q3,  q10, d0[0]
535*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q4,  q11, d0[0]
536*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q8,  q8,  q1
537*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q9,  q9,  q2
538*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q10, q10, q3
539*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q11, q11, q4
540*c0909341SAndroid Build Coastguard Worker        bx              lr
541*c0909341SAndroid Build Coastguard Workerendfunc
542*c0909341SAndroid Build Coastguard Worker
543*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_wht_wht_4x4_16bpc_neon, export=1
544*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
545*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q5}
546*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
547*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
548*c0909341SAndroid Build Coastguard Worker        vld1.32         {q8,  q9},  [r2, :128]
549*c0909341SAndroid Build Coastguard Worker        vst1.32         {q14, q15}, [r2, :128]!
550*c0909341SAndroid Build Coastguard Worker        vshr.s32        q8,  q8,  #2
551*c0909341SAndroid Build Coastguard Worker        vld1.32         {q10, q11}, [r2, :128]
552*c0909341SAndroid Build Coastguard Worker        vshr.s32        q9,  q9,  #2
553*c0909341SAndroid Build Coastguard Worker        vshr.s32        q10, q10, #2
554*c0909341SAndroid Build Coastguard Worker        vshr.s32        q11, q11, #2
555*c0909341SAndroid Build Coastguard Worker
556*c0909341SAndroid Build Coastguard Worker        iwht4
557*c0909341SAndroid Build Coastguard Worker
558*c0909341SAndroid Build Coastguard Worker        vst1.32         {q14, q15}, [r2, :128]
559*c0909341SAndroid Build Coastguard Worker        transpose_4x4s  q8,  q9,  q10, q11, d16, d17, d18, d19, d20, d21, d22, d23
560*c0909341SAndroid Build Coastguard Worker
561*c0909341SAndroid Build Coastguard Worker        iwht4
562*c0909341SAndroid Build Coastguard Worker
563*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r0, :64], r1
564*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
565*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r0, :64], r1
566*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q9
567*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r0, :64], r1
568*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q10
569*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r0, :64], r1
570*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q11
571*c0909341SAndroid Build Coastguard Worker
572*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
573*c0909341SAndroid Build Coastguard Workerendfunc
574*c0909341SAndroid Build Coastguard Worker
575*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x4_neon
576*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
577*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
578*c0909341SAndroid Build Coastguard Worker        vld1.32         {q8,  q9},  [r2, :128]
579*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]!
580*c0909341SAndroid Build Coastguard Worker        vld1.32         {q10, q11}, [r2, :128]
581*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]
582*c0909341SAndroid Build Coastguard Worker
583*c0909341SAndroid Build Coastguard Worker        blx             r4
584*c0909341SAndroid Build Coastguard Worker
585*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
586*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q9
587*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q10
588*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q11
589*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
590*c0909341SAndroid Build Coastguard Worker
591*c0909341SAndroid Build Coastguard Worker        blx             r5
592*c0909341SAndroid Build Coastguard Worker
593*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r0, :64], r1
594*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r0, :64], r1
595*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q8,  #4
596*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r0, :64], r1
597*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q9,  #4
598*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r0, :64], r1
599*c0909341SAndroid Build Coastguard Worker
600*c0909341SAndroid Build Coastguard WorkerL(itx_4x4_end):
601*c0909341SAndroid Build Coastguard Worker        // read bitdepth_max from the callers stack
602*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #44]
603*c0909341SAndroid Build Coastguard Worker        vdup.i16        q15, r4
604*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
605*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q8,  q8,  q0
606*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q9,  q9,  q1
607*c0909341SAndroid Build Coastguard Worker        vmax.s16        q8,  q8,  q14
608*c0909341SAndroid Build Coastguard Worker        vmax.s16        q9,  q9,  q14
609*c0909341SAndroid Build Coastguard Worker        vmin.s16        q8,  q8,  q15
610*c0909341SAndroid Build Coastguard Worker        vmin.s16        q9,  q9,  q15
611*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16}, [r0, :64], r1
612*c0909341SAndroid Build Coastguard Worker        vst1.16         {d17}, [r0, :64], r1
613*c0909341SAndroid Build Coastguard Worker        vst1.16         {d18}, [r0, :64], r1
614*c0909341SAndroid Build Coastguard Worker        vst1.16         {d19}, [r0, :64], r1
615*c0909341SAndroid Build Coastguard Worker
616*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q5}
617*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
618*c0909341SAndroid Build Coastguard Workerendfunc
619*c0909341SAndroid Build Coastguard Worker
620*c0909341SAndroid Build Coastguard Worker.macro def_fn_4x4 txfm1, txfm2
621*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_4x4_16bpc_neon, export=1
622*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
623*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q5}
624*c0909341SAndroid Build Coastguard Worker
625*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
626*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
627*c0909341SAndroid Build Coastguard Worker        bne             1f
628*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
629*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
630*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16[], d17[]},  [r2, :32]
631*c0909341SAndroid Build Coastguard Worker        vdup.32         d4,  r12
632*c0909341SAndroid Build Coastguard Worker        vst1.32         {d28[0]}, [r2, :32]
633*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q8,  q8,  d4[0]
634*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r0, :64], r1
635*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d20, q8
636*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d21, q8
637*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r0, :64], r1
638*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q10, d4[1]
639*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r0, :64], r1
640*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q10, #4
641*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r0, :64], r1
642*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q10, #4
643*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
644*c0909341SAndroid Build Coastguard Worker1:
645*c0909341SAndroid Build Coastguard Worker.endif
646*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4s_x4_neon
647*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_4h_x4_neon)
648*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_4x4_neon
649*c0909341SAndroid Build Coastguard Workerendfunc
650*c0909341SAndroid Build Coastguard Worker.endm
651*c0909341SAndroid Build Coastguard Worker
652*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, dct
653*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, identity
654*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, adst
655*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, flipadst
656*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, identity
657*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, dct
658*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, adst
659*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, flipadst
660*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, dct
661*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, adst
662*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, flipadst
663*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, dct
664*c0909341SAndroid Build Coastguard Worker
665*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, identity
666*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, identity
667*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, adst
668*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, flipadst
669*c0909341SAndroid Build Coastguard Worker
670*c0909341SAndroid Build Coastguard Worker.macro idct_4s_x8 r0, r1, r2, r3, r4, r5, r6, r7
671*c0909341SAndroid Build Coastguard Worker        idct_4s_x4      \r0, \r2, \r4, \r6
672*c0909341SAndroid Build Coastguard Worker
673*c0909341SAndroid Build Coastguard Worker        vmov.i32        q5,  #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
674*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q4,  #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
675*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
676*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
677*c0909341SAndroid Build Coastguard Worker.endr
678*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
679*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
680*c0909341SAndroid Build Coastguard Worker.endr
681*c0909341SAndroid Build Coastguard Worker
682*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q2,  \r1, \r7, d2[0], d2[1] // -> t4a
683*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q3,  \r1, \r7, d2[1], d2[0] // -> t7a
684*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q6,  \r5, \r3, d3[0], d3[1] // -> t5a
685*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q7,  \r5, \r3, d3[1], d3[0] // -> t6a
686*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r1, q2,  #12               // t4a
687*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r7, q3,  #12               // t7a
688*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r3, q6,  #12               // t5a
689*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r5, q7,  #12               // t6a
690*c0909341SAndroid Build Coastguard Worker
691*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q2,  \r1, \r3               // t4
692*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r1, \r1, \r3               // t5a
693*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q3,  \r7, \r5               // t7
694*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r3, \r7, \r5               // t6a
695*c0909341SAndroid Build Coastguard Worker
696*c0909341SAndroid Build Coastguard Worker.irp r, q2, \r1, q3, \r3
697*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
698*c0909341SAndroid Build Coastguard Worker.endr
699*c0909341SAndroid Build Coastguard Worker.irp r, q2, \r1, q3, \r3
700*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
701*c0909341SAndroid Build Coastguard Worker.endr
702*c0909341SAndroid Build Coastguard Worker
703*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q7,  \r3, \r1, d0[0], d0[0] // -> t5
704*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q6,  \r3, \r1, d0[0], d0[0] // -> t6
705*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q7,  q7,  #12               // t5
706*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q5,  q6,  #12               // t6
707*c0909341SAndroid Build Coastguard Worker
708*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r7, \r0, q3  // out7
709*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r0, \r0, q3  // out0
710*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r1, \r2, q5  // out1
711*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q6,  \r2, q5  // out6
712*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r2, \r4, q7  // out2
713*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r5, \r4, q7  // out5
714*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r3, \r6, q2  // out3
715*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r4, \r6, q2  // out4
716*c0909341SAndroid Build Coastguard Worker        vmov            \r6, q6       // out6
717*c0909341SAndroid Build Coastguard Worker.endm
718*c0909341SAndroid Build Coastguard Worker
719*c0909341SAndroid Build Coastguard Worker.macro idct_2s_x8 r0, r1, r2, r3, r4, r5, r6, r7
720*c0909341SAndroid Build Coastguard Worker        idct_2s_x4      \r0, \r2, \r4, \r6
721*c0909341SAndroid Build Coastguard Worker
722*c0909341SAndroid Build Coastguard Worker        vmov.i32        d9,  #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
723*c0909341SAndroid Build Coastguard Worker        vmvn.i32        d8,  #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
724*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
725*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d9
726*c0909341SAndroid Build Coastguard Worker.endr
727*c0909341SAndroid Build Coastguard Worker.irp r, \r0, \r2, \r4, \r6
728*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d8
729*c0909341SAndroid Build Coastguard Worker.endr
730*c0909341SAndroid Build Coastguard Worker
731*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  \r1, \r7, d2[0], d2[1] // -> t4a
732*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d5,  \r1, \r7, d2[1], d2[0] // -> t7a
733*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  \r5, \r3, d3[0], d3[1] // -> t5a
734*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  \r5, \r3, d3[1], d3[0] // -> t6a
735*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r1, d4,  #12               // t4a
736*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r7, d5,  #12               // t7a
737*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r3, d6,  #12               // t5a
738*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r5, d7,  #12               // t6a
739*c0909341SAndroid Build Coastguard Worker
740*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d4,  \r1, \r3               // t4
741*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r1, \r1, \r3               // t5a
742*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d5,  \r7, \r5               // t7
743*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r3, \r7, \r5               // t6a
744*c0909341SAndroid Build Coastguard Worker
745*c0909341SAndroid Build Coastguard Worker.irp r, d4, \r1, d5, \r3
746*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d9
747*c0909341SAndroid Build Coastguard Worker.endr
748*c0909341SAndroid Build Coastguard Worker.irp r, d4, \r1, d5, \r3
749*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d8
750*c0909341SAndroid Build Coastguard Worker.endr
751*c0909341SAndroid Build Coastguard Worker
752*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  \r3, \r1, d0[0], d0[0] // -> t5
753*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  \r3, \r1, d0[0], d0[0] // -> t6
754*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d6,  d6,  #12               // t5
755*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d7,  d7,  #12               // t6
756*c0909341SAndroid Build Coastguard Worker
757*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r7, \r0, d5  // out7
758*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r0, \r0, d5  // out0
759*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r1, \r2, d7  // out1
760*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d7,  \r2, d7  // out6
761*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r2, \r4, d6  // out2
762*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r5, \r4, d6  // out5
763*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r3, \r6, d4  // out3
764*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r4, \r6, d4  // out4
765*c0909341SAndroid Build Coastguard Worker        vmov            \r6, d7       // out6
766*c0909341SAndroid Build Coastguard Worker.endm
767*c0909341SAndroid Build Coastguard Worker
768*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4s_x8_neon
769*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
770*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
771*c0909341SAndroid Build Coastguard Worker        idct_4s_x8      q8,  q9,  q10, q11, q12, q13, q14, q15
772*c0909341SAndroid Build Coastguard Worker        bx              lr
773*c0909341SAndroid Build Coastguard Workerendfunc
774*c0909341SAndroid Build Coastguard Worker
775*c0909341SAndroid Build Coastguard Worker.macro iadst_4s_x8 r0, r1, r2, r3, r4, r5, r6, r7
776*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst8_coeffs
777*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]!
778*c0909341SAndroid Build Coastguard Worker
779*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q2,  q15, q8,  d0[0], d0[1]
780*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q3,  q15, q8,  d0[1], d0[0]
781*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q4,  q13, q10, d1[0], d1[1]
782*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q8,  q2,  #12 // t0a
783*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q15, q3,  #12 // t1a
784*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q5,  q13, q10, d1[1], d1[0]
785*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q6,  q11, q12, d2[0], d2[1]
786*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q10, q4,  #12 // t2a
787*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q13, q5,  #12 // t3a
788*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q7,  q11, q12, d2[1], d2[0]
789*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q2,  q9,  q14, d3[0], d3[1]
790*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q12, q6,  #12 // t4a
791*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q11, q7,  #12 // t5a
792*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q3,  q9,  q14, d3[1], d3[0]
793*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q14, q2,  #12 // t6a
794*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q9,  q3,  #12 // t7a
795*c0909341SAndroid Build Coastguard Worker
796*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0}, [r12]
797*c0909341SAndroid Build Coastguard Worker
798*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q2,  q8,  q12 // t0
799*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q3,  q8,  q12 // t4
800*c0909341SAndroid Build Coastguard Worker        vmov.i32        q12, #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
801*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q4,  q15, q11 // t1
802*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q5,  q15, q11 // t5
803*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q6,  q10, q14 // t2
804*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q7,  q10, q14 // t6
805*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q14, #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
806*c0909341SAndroid Build Coastguard Worker        vqadd.s32       q10, q13, q9  // t3
807*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q11, q13, q9  // t7
808*c0909341SAndroid Build Coastguard Worker
809*c0909341SAndroid Build Coastguard Worker.irp r, q2, q3, q4, q5, q6, q7, q10, q11
810*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q12
811*c0909341SAndroid Build Coastguard Worker.endr
812*c0909341SAndroid Build Coastguard Worker.irp r, q2, q3, q4, q5, q6, q7, q10, q11
813*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q14
814*c0909341SAndroid Build Coastguard Worker.endr
815*c0909341SAndroid Build Coastguard Worker
816*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q8,  q3,  q5,  d1[1], d1[0]
817*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q13, q3,  q5,  d1[0], d1[1]
818*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q14, q11, q7,  d1[1], d1[0]
819*c0909341SAndroid Build Coastguard Worker
820*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q3,  q8,  #12 // t4a
821*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q5,  q13, #12 // t5a
822*c0909341SAndroid Build Coastguard Worker
823*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q8,  q11, q7,  d1[0], d1[1]
824*c0909341SAndroid Build Coastguard Worker
825*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q7,  q14, #12 // t6a
826*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q11, q8,  #12 // t7a
827*c0909341SAndroid Build Coastguard Worker
828*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r0, q2,  q6  // out0
829*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q2,  q2,  q6  // t2
830*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r7, q4,  q10 // out7
831*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q4,  q4,  q10 // t3
832*c0909341SAndroid Build Coastguard Worker
833*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q10, #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
834*c0909341SAndroid Build Coastguard Worker
835*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r1, q3,  q7  // out1
836*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q3,  q3,  q7  // t6
837*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \r6, q5,  q11 // out6
838*c0909341SAndroid Build Coastguard Worker        vqsub.s32       q5,  q5,  q11 // t7
839*c0909341SAndroid Build Coastguard Worker
840*c0909341SAndroid Build Coastguard Worker        // Not clipping the output registers, as they will be downshifted and
841*c0909341SAndroid Build Coastguard Worker        // narrowed afterwards anyway.
842*c0909341SAndroid Build Coastguard Worker.irp r, q2, q4, q3, q5
843*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q12
844*c0909341SAndroid Build Coastguard Worker.endr
845*c0909341SAndroid Build Coastguard Worker.irp r, q2, q4, q3, q5
846*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q10
847*c0909341SAndroid Build Coastguard Worker.endr
848*c0909341SAndroid Build Coastguard Worker
849*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \r7, \r7      // out7
850*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \r1, \r1      // out1
851*c0909341SAndroid Build Coastguard Worker
852*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q10, q2,  q4,  d0[0], d0[0] // -> out3 (q11 or q12)
853*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q6,  q2,  q4,  d0[0], d0[0] // -> out4 (q12 or q11)
854*c0909341SAndroid Build Coastguard Worker        vmul_vmls       q12, q3,  q5,  d0[0], d0[0] // -> out5 (q13 or q10)
855*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q2,  q10, #12 // out3
856*c0909341SAndroid Build Coastguard Worker        vmul_vmla       q10, q3,  q5,  d0[0], d0[0] // -> out2 (q10 or q13)
857*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q3,  q12, #12 // out5
858*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r2, q10, #12 // out2 (q10 or q13)
859*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \r4, q6,  #12 // out4 (q12 or q11)
860*c0909341SAndroid Build Coastguard Worker
861*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \r3, q2       // out3
862*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \r5, q3       // out5
863*c0909341SAndroid Build Coastguard Worker.endm
864*c0909341SAndroid Build Coastguard Worker
865*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4s_x8_neon
866*c0909341SAndroid Build Coastguard Worker        iadst_4s_x8     q8,  q9,  q10, q11, q12, q13, q14, q15
867*c0909341SAndroid Build Coastguard Worker        bx              lr
868*c0909341SAndroid Build Coastguard Workerendfunc
869*c0909341SAndroid Build Coastguard Worker
870*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4s_x8_neon
871*c0909341SAndroid Build Coastguard Worker        iadst_4s_x8     q15, q14, q13, q12, q11, q10, q9,  q8
872*c0909341SAndroid Build Coastguard Worker        bx              lr
873*c0909341SAndroid Build Coastguard Workerendfunc
874*c0909341SAndroid Build Coastguard Worker
875*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4s_x8_neon
876*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q8,  q8,  #1
877*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q9,  q9,  #1
878*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q10, q10, #1
879*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q11, q11, #1
880*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q12, q12, #1
881*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q13, q13, #1
882*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q14, q14, #1
883*c0909341SAndroid Build Coastguard Worker        vqshl.s32       q15, q15, #1
884*c0909341SAndroid Build Coastguard Worker        bx              lr
885*c0909341SAndroid Build Coastguard Workerendfunc
886*c0909341SAndroid Build Coastguard Worker
887*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x8_neon
888*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
889*c0909341SAndroid Build Coastguard Worker        mov             r7,  #8*4
890*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
891*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i},  [r2, :128]
892*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0},  [r2, :128], r7
893*c0909341SAndroid Build Coastguard Worker.endr
894*c0909341SAndroid Build Coastguard Worker
895*c0909341SAndroid Build Coastguard Worker        blx             r4
896*c0909341SAndroid Build Coastguard Worker
897*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
898*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q12, #1
899*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q9,  #1
900*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q13, #1
901*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q10, #1
902*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q14, #1
903*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q11, #1
904*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #1
905*c0909341SAndroid Build Coastguard Worker
906*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
907*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
908*c0909341SAndroid Build Coastguard Worker
909*c0909341SAndroid Build Coastguard Worker        blt             1f
910*c0909341SAndroid Build Coastguard Worker
911*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r7, lsl #3
912*c0909341SAndroid Build Coastguard Worker        vpush           {q8-q11}
913*c0909341SAndroid Build Coastguard Worker
914*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #16
915*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
916*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
917*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i},  [r2, :128]
918*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0},  [r2, :128], r7
919*c0909341SAndroid Build Coastguard Worker.endr
920*c0909341SAndroid Build Coastguard Worker
921*c0909341SAndroid Build Coastguard Worker        blx             r4
922*c0909341SAndroid Build Coastguard Worker
923*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q15, #1
924*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q11, #1
925*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q14, #1
926*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q10, #1
927*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q13, #1
928*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q9,  #1
929*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q12, #1
930*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q8,  #1
931*c0909341SAndroid Build Coastguard Worker        vpop            {q8-q11}
932*c0909341SAndroid Build Coastguard Worker
933*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q12, q13, q14, q15
934*c0909341SAndroid Build Coastguard Worker
935*c0909341SAndroid Build Coastguard Worker        b               2f
936*c0909341SAndroid Build Coastguard Worker
937*c0909341SAndroid Build Coastguard Worker1:
938*c0909341SAndroid Build Coastguard Worker        vmov.i16        q12, #0
939*c0909341SAndroid Build Coastguard Worker        vmov.i16        q13, #0
940*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
941*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
942*c0909341SAndroid Build Coastguard Worker
943*c0909341SAndroid Build Coastguard Worker2:
944*c0909341SAndroid Build Coastguard Worker        blx             r5
945*c0909341SAndroid Build Coastguard Worker
946*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r7
947*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
948*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,r10,pc}
949*c0909341SAndroid Build Coastguard Workerendfunc
950*c0909341SAndroid Build Coastguard Worker
951*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8 txfm1, txfm2, eob_half
952*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_8x8_16bpc_neon, export=1
953*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
954*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   8,   1
955*c0909341SAndroid Build Coastguard Worker.endif
956*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,r7,r10,lr}
957*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
958*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_half
959*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4s_x8_neon
960*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_8h_x8_neon)
961*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_8x8_neon
962*c0909341SAndroid Build Coastguard Workerendfunc
963*c0909341SAndroid Build Coastguard Worker.endm
964*c0909341SAndroid Build Coastguard Worker
965*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, dct, 10
966*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, identity, 10
967*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, adst, 10
968*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, flipadst, 10
969*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, identity, 4
970*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, dct, 10
971*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, adst, 10
972*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, flipadst, 10
973*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, dct, 10
974*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, adst, 10
975*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, flipadst, 10
976*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, dct, 4
977*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, identity, 4
978*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, identity, 4
979*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, adst, 4
980*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, flipadst, 4
981*c0909341SAndroid Build Coastguard Worker
982*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x4_neon
983*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
984*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
985*c0909341SAndroid Build Coastguard Worker        vmov.i32        q1,  #0
986*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2, :128]
987*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
988*c0909341SAndroid Build Coastguard Worker        vdup.32         d4,  r12
989*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2, :128]
990*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
991*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r2, :128]
992*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
993*c0909341SAndroid Build Coastguard Worker        vld1.16         {q14, q15}, [r2, :128]
994*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
995*c0909341SAndroid Build Coastguard Worker
996*c0909341SAndroid Build Coastguard Worker        scale_input     d4[0], q8,  q9, q10, q11, q12, q13, q14, q15
997*c0909341SAndroid Build Coastguard Worker
998*c0909341SAndroid Build Coastguard Worker        blx             r4
999*c0909341SAndroid Build Coastguard Worker
1000*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
1001*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q9
1002*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q10
1003*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q11
1004*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d20, q12
1005*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d21, q13
1006*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d22, q14
1007*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d23, q15
1008*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1009*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
1010*c0909341SAndroid Build Coastguard Worker        vswp            d17, d20
1011*c0909341SAndroid Build Coastguard Worker        vswp            d19, d21
1012*c0909341SAndroid Build Coastguard Worker        vswp            d18, d20
1013*c0909341SAndroid Build Coastguard Worker        vswp            d21, d22
1014*c0909341SAndroid Build Coastguard Worker
1015*c0909341SAndroid Build Coastguard Worker        blx             r5
1016*c0909341SAndroid Build Coastguard Worker
1017*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r7
1018*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1019*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,r10,pc}
1020*c0909341SAndroid Build Coastguard Workerendfunc
1021*c0909341SAndroid Build Coastguard Worker
1022*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x8_neon
1023*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
1024*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
1025*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
1026*c0909341SAndroid Build Coastguard Worker        mov             r7,  #32
1027*c0909341SAndroid Build Coastguard Worker        blt             1f
1028*c0909341SAndroid Build Coastguard Worker
1029*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #16
1030*c0909341SAndroid Build Coastguard Worker        vdup.32         d2,  r12
1031*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11
1032*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i},  [r2, :128]
1033*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0},  [r2, :128], r7
1034*c0909341SAndroid Build Coastguard Worker.endr
1035*c0909341SAndroid Build Coastguard Worker
1036*c0909341SAndroid Build Coastguard Worker        scale_input     d2[0], q8,  q9, q10, q11
1037*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r7, lsl #2
1038*c0909341SAndroid Build Coastguard Worker
1039*c0909341SAndroid Build Coastguard Worker        blx             r4
1040*c0909341SAndroid Build Coastguard Worker
1041*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #16
1042*c0909341SAndroid Build Coastguard Worker
1043*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d24, q8
1044*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d25, q9
1045*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d26, q10
1046*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d27, q11
1047*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q12, q13, d24, d25, d26, d27
1048*c0909341SAndroid Build Coastguard Worker
1049*c0909341SAndroid Build Coastguard Worker        b               2f
1050*c0909341SAndroid Build Coastguard Worker
1051*c0909341SAndroid Build Coastguard Worker1:
1052*c0909341SAndroid Build Coastguard Worker        vmov.i16        q12, #0
1053*c0909341SAndroid Build Coastguard Worker        vmov.i16        q13, #0
1054*c0909341SAndroid Build Coastguard Worker
1055*c0909341SAndroid Build Coastguard Worker2:
1056*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
1057*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
1058*c0909341SAndroid Build Coastguard Worker        vdup.32         d2,  r12
1059*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11
1060*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i},  [r2, :128]
1061*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0},  [r2, :128], r7
1062*c0909341SAndroid Build Coastguard Worker.endr
1063*c0909341SAndroid Build Coastguard Worker        scale_input     d2[0], q8,  q9, q10, q11
1064*c0909341SAndroid Build Coastguard Worker        blx             r4
1065*c0909341SAndroid Build Coastguard Worker
1066*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
1067*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q9
1068*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q10
1069*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q11
1070*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1071*c0909341SAndroid Build Coastguard Worker
1072*c0909341SAndroid Build Coastguard Worker        vmov            q10, q12
1073*c0909341SAndroid Build Coastguard Worker        vmov            q11, q13
1074*c0909341SAndroid Build Coastguard Worker
1075*c0909341SAndroid Build Coastguard Worker        blx             r5
1076*c0909341SAndroid Build Coastguard Worker
1077*c0909341SAndroid Build Coastguard Worker        load_add_store_4x8 r0, r7
1078*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1079*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,r10,pc}
1080*c0909341SAndroid Build Coastguard Workerendfunc
1081*c0909341SAndroid Build Coastguard Worker
1082*c0909341SAndroid Build Coastguard Worker.macro def_fn_48 w, h, txfm1, txfm2, eob_half
1083*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
1084*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1085*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  0
1086*c0909341SAndroid Build Coastguard Worker.endif
1087*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,r7,r10,lr}
1088*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1089*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4s_x\w\()_neon
1090*c0909341SAndroid Build Coastguard Worker.if \w == 4
1091*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_half
1092*c0909341SAndroid Build Coastguard Worker.endif
1093*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_\w\()h_x\h\()_neon)
1094*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1095*c0909341SAndroid Build Coastguard Workerendfunc
1096*c0909341SAndroid Build Coastguard Worker.endm
1097*c0909341SAndroid Build Coastguard Worker
1098*c0909341SAndroid Build Coastguard Worker.macro def_fns_48 w, h
1099*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, dct, 13
1100*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, identity, 13
1101*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, adst, 13
1102*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, flipadst, 13
1103*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, identity, 4
1104*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, dct, 13
1105*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, adst, 13
1106*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, flipadst, 13
1107*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, dct, 13
1108*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, adst, 13
1109*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, flipadst, 13
1110*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, dct, 16
1111*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, identity, 4
1112*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, identity, 4
1113*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, adst, 16
1114*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, flipadst, 16
1115*c0909341SAndroid Build Coastguard Worker.endm
1116*c0909341SAndroid Build Coastguard Worker
1117*c0909341SAndroid Build Coastguard Workerdef_fns_48 4, 8
1118*c0909341SAndroid Build Coastguard Workerdef_fns_48 8, 4
1119*c0909341SAndroid Build Coastguard Worker
1120*c0909341SAndroid Build Coastguard Workerfunction inv_dct_2s_x16_neon
1121*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
1122*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]!
1123*c0909341SAndroid Build Coastguard Worker
1124*c0909341SAndroid Build Coastguard Worker        idct_2s_x8      d16, d18, d20, d22, d24, d26, d28, d30
1125*c0909341SAndroid Build Coastguard Worker
1126*c0909341SAndroid Build Coastguard Worker        // idct_8 leaves the row_clip_max/min constants in d9 and d8
1127*c0909341SAndroid Build Coastguard Worker.irp r, d16, d18, d20, d22, d24, d26, d28, d30
1128*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d9
1129*c0909341SAndroid Build Coastguard Worker.endr
1130*c0909341SAndroid Build Coastguard Worker.irp r, d16, d18, d20, d22, d24, d26, d28, d30
1131*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d8
1132*c0909341SAndroid Build Coastguard Worker.endr
1133*c0909341SAndroid Build Coastguard Worker
1134*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
1135*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #32
1136*c0909341SAndroid Build Coastguard Worker
1137*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d17, d31, d0[0], d0[1]  // -> t8a
1138*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d5,  d17, d31, d0[1], d0[0]  // -> t15a
1139*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d25, d23, d1[0], d1[1]  // -> t9a
1140*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d4,  #12                // t8a
1141*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d31, d5,  #12                // t15a
1142*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d25, d23, d1[1], d1[0]  // -> t14a
1143*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d5,  d21, d27, d2[0], d2[1]  // -> t10a
1144*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d6,  #12                // t9a
1145*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d4,  #12                // t14a
1146*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d21, d27, d2[1], d2[0]  // -> t13a
1147*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d29, d19, d3[0], d3[1]  // -> t11a
1148*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d5,  #12                // t10a
1149*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d6,  #12                // t13a
1150*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d5,  d29, d19, d3[1], d3[0]  // -> t12a
1151*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d4,  #12                // t11a
1152*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d5,  #12                // t12a
1153*c0909341SAndroid Build Coastguard Worker
1154*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0}, [r12, :128]
1155*c0909341SAndroid Build Coastguard Worker
1156*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d4,  d17, d23  // t9
1157*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d17, d23  // t8
1158*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d5,  d31, d25  // t14
1159*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d25  // t15
1160*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d23, d19, d21  // t10
1161*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d19, d19, d21  // t11
1162*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d25, d29, d27  // t12
1163*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d29, d27  // t13
1164*c0909341SAndroid Build Coastguard Worker
1165*c0909341SAndroid Build Coastguard Worker.irp r, d4, d17, d5, d31, d23, d19, d25, d29
1166*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d9
1167*c0909341SAndroid Build Coastguard Worker.endr
1168*c0909341SAndroid Build Coastguard Worker.irp r, d4, d17, d5, d31, d23, d19, d25, d29
1169*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d8
1170*c0909341SAndroid Build Coastguard Worker.endr
1171*c0909341SAndroid Build Coastguard Worker
1172*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d5,  d4,  d1[0], d1[1]  // -> t9a
1173*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  d5,  d4,  d1[1], d1[0]  // -> t14a
1174*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d6,  #12                // t9a
1175*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d7,  #12                // t14a
1176*c0909341SAndroid Build Coastguard Worker
1177*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d29, d23, d1[0], d1[1]  // -> t13a
1178*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  d29, d23, d1[1], d1[0]  // -> t10a
1179*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d6,  #12                // t13a
1180*c0909341SAndroid Build Coastguard Worker        vneg.s32        d7,  d7
1181*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d7,  #12                // t10a
1182*c0909341SAndroid Build Coastguard Worker
1183*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d4,  d17, d19  // t11a
1184*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d17, d19  // t8a
1185*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d5,  d31, d25  // t12a
1186*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d25  // t15a
1187*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d19, d21, d23  // t9
1188*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d21, d23  // t10
1189*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d27, d29  // t13
1190*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d27, d27, d29  // t14
1191*c0909341SAndroid Build Coastguard Worker
1192*c0909341SAndroid Build Coastguard Worker.irp r, d4, d17, d5, d31, d19, d21, d25, d27
1193*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d9
1194*c0909341SAndroid Build Coastguard Worker.endr
1195*c0909341SAndroid Build Coastguard Worker.irp r, d4, d17, d5, d31, d19, d21, d25, d27
1196*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d8
1197*c0909341SAndroid Build Coastguard Worker.endr
1198*c0909341SAndroid Build Coastguard Worker
1199*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d5,  d4,  d0[0], d0[0]  // -> t11
1200*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  d5,  d4,  d0[0], d0[0]  // -> t12
1201*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d25, d21, d0[0], d0[0]  // -> t10a
1202*c0909341SAndroid Build Coastguard Worker
1203*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d6,  d6,  #12  // t11
1204*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d7,  d7,  #12  // t12
1205*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d5,  d25, d21, d0[0], d0[0]  // -> t13a
1206*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d4,  d4,  #12  // t10a
1207*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d5,  d5,  #12  // t13a
1208*c0909341SAndroid Build Coastguard Worker
1209*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d8,  d16, d31  // out0
1210*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d31, d16, d31  // out15
1211*c0909341SAndroid Build Coastguard Worker        vmov            d16, d8
1212*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d30, d17  // out7
1213*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d9,  d30, d17  // out8
1214*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d18, d27  // out1
1215*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d30, d18, d27  // out14
1216*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d18, d20, d5   // out2
1217*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d20, d5   // out13
1218*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d5,  d28, d19  // out6
1219*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d28, d19  // out9
1220*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d19, d22, d7   // out3
1221*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d28, d22, d7   // out12
1222*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d20, d24, d6   // out4
1223*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d27, d24, d6   // out11
1224*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d21, d26, d4   // out5
1225*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d26, d26, d4   // out10
1226*c0909341SAndroid Build Coastguard Worker        vmov            d24, d9
1227*c0909341SAndroid Build Coastguard Worker        vmov            d22, d5
1228*c0909341SAndroid Build Coastguard Worker
1229*c0909341SAndroid Build Coastguard Worker        bx              lr
1230*c0909341SAndroid Build Coastguard Workerendfunc
1231*c0909341SAndroid Build Coastguard Worker
1232*c0909341SAndroid Build Coastguard Worker.macro iadst_16 o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14, o15
1233*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst16_coeffs
1234*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]!
1235*c0909341SAndroid Build Coastguard Worker
1236*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d31, d16, d0[0], d0[1] // -> t0
1237*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d31, d16, d0[1], d0[0] // -> t1
1238*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d29, d18, d1[0], d1[1] // -> t2
1239*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d16, d4,  #12               // t0
1240*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d31, d6,  #12               // t1
1241*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d29, d18, d1[1], d1[0] // -> t3
1242*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d27, d20, d2[0], d2[1] // -> t4
1243*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d8,  #12               // t2
1244*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d4,  #12               // t3
1245*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d27, d20, d2[1], d2[0] // -> t5
1246*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d25, d22, d3[0], d3[1] // -> t6
1247*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d6,  #12               // t4
1248*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d8,  #12               // t5
1249*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d25, d22, d3[1], d3[0] // -> t7
1250*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
1251*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
1252*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d23, d24, d0[0], d0[1] // -> t8
1253*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d4,  #12               // t6
1254*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d6,  #12               // t7
1255*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d23, d24, d0[1], d0[0] // -> t9
1256*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d21, d26, d1[0], d1[1] // -> t10
1257*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d8,  #12               // t8
1258*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d4,  #12               // t9
1259*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d21, d26, d1[1], d1[0] // -> t11
1260*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d19, d28, d2[0], d2[1] // -> t12
1261*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d6,  #12               // t10
1262*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d8,  #12               // t11
1263*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d19, d28, d2[1], d2[0] // -> t13
1264*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d17, d30, d3[0], d3[1] // -> t14
1265*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d4,  #12               // t12
1266*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d28, d6,  #12               // t13
1267*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d17, d30, d3[1], d3[0] // -> t15
1268*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d8,  #12               // t14
1269*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d30, d4,  #12               // t15
1270*c0909341SAndroid Build Coastguard Worker
1271*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
1272*c0909341SAndroid Build Coastguard Worker
1273*c0909341SAndroid Build Coastguard Worker        vmov.i32        d11, #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
1274*c0909341SAndroid Build Coastguard Worker        vmvn.i32        d10, #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
1275*c0909341SAndroid Build Coastguard Worker
1276*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d5,  d16, d23 // t8a
1277*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d16, d23 // t0a
1278*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d7,  d31, d24 // t9a
1279*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d24 // t1a
1280*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d18, d21 // t2a
1281*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d18, d18, d21 // t10a
1282*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d24, d29, d26 // t3a
1283*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d29, d26 // t11a
1284*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d21, d20, d19 // t4a
1285*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d20, d19 // t12a
1286*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d26, d27, d28 // t5a
1287*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d27, d27, d28 // t13a
1288*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d19, d22, d17 // t6a
1289*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d22, d22, d17 // t14a
1290*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d28, d25, d30 // t7a
1291*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d25, d30 // t15a
1292*c0909341SAndroid Build Coastguard Worker
1293*c0909341SAndroid Build Coastguard Worker.irp r, d5, d16, d7, d31, d23, d18, d24, d29, d21, d20, d26, d27, d19, d22, d28, d25
1294*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
1295*c0909341SAndroid Build Coastguard Worker.endr
1296*c0909341SAndroid Build Coastguard Worker.irp r, d5, d16, d7, d31, d23, d18, d24, d29, d21, d20, d26, d27, d19, d22, d28, d25
1297*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
1298*c0909341SAndroid Build Coastguard Worker.endr
1299*c0909341SAndroid Build Coastguard Worker
1300*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d5,  d7,  d2[1], d2[0] // -> t8
1301*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d5,  d7,  d2[0], d2[1] // -> t9
1302*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d18, d29, d3[1], d3[0] // -> t10
1303*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d4,  #12               // t8
1304*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d30, d6,  #12               // t9
1305*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d18, d29, d3[0], d3[1] // -> t11
1306*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d27, d20, d2[1], d2[0] // -> t12
1307*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d8,  #12               // t10
1308*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d4,  #12               // t11
1309*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d27, d20, d2[0], d2[1] // -> t13
1310*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d25, d22, d3[1], d3[0] // -> t14
1311*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d6,  #12               // t12
1312*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d8,  #12               // t13
1313*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d25, d22, d3[0], d3[1] // -> t15
1314*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d4,  #12               // t14
1315*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d6,  #12               // t15
1316*c0909341SAndroid Build Coastguard Worker
1317*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d2,  d16, d21 // t4
1318*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d16, d21 // t0
1319*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d3,  d31, d26 // t5
1320*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d26 // t1
1321*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d21, d23, d19 // t2
1322*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d23, d23, d19 // t6
1323*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d26, d24, d28 // t3
1324*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d24, d24, d28 // t7
1325*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d19, d17, d27 // t8a
1326*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d17, d17, d27 // t12a
1327*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d28, d30, d20 // t9a
1328*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d30, d30, d20 // t13a
1329*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d27, d18, d25 // t10a
1330*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d18, d18, d25 // t14a
1331*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d20, d29, d22 // t11a
1332*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d29, d22 // t15a
1333*c0909341SAndroid Build Coastguard Worker
1334*c0909341SAndroid Build Coastguard Worker.irp r, d2, d16, d3, d31, d21, d23, d26, d24, d19, d17, d28, d30, d27, d18, d20, d29
1335*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
1336*c0909341SAndroid Build Coastguard Worker.endr
1337*c0909341SAndroid Build Coastguard Worker.irp r, d2, d16, d3, d31, d21, d23, d26, d24, d19, d17, d28, d30, d27, d18, d20, d29
1338*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
1339*c0909341SAndroid Build Coastguard Worker.endr
1340*c0909341SAndroid Build Coastguard Worker
1341*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d2,  d3,  d1[1], d1[0] // -> t4a
1342*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d2,  d3,  d1[0], d1[1] // -> t5a
1343*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d24, d23, d1[1], d1[0] // -> t6a
1344*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d4,  #12               // t4a
1345*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d6,  #12               // t5a
1346*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d24, d23, d1[0], d1[1] // -> t7a
1347*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d17, d30, d1[1], d1[0] // -> t12
1348*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d8,  #12               // t6a
1349*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d4,  #12               // t7a
1350*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d17, d30, d1[0], d1[1] // -> t13
1351*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d29, d18, d1[1], d1[0] // -> t14
1352*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d6,  #12               // t12
1353*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d29, d18, d1[0], d1[1] // -> t15
1354*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d8,  #12               // t13
1355*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d30, d4,  #12               // t14
1356*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d6,  #12               // t15
1357*c0909341SAndroid Build Coastguard Worker
1358*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d2,  d16, d21 // t2a
1359*c0909341SAndroid Build Coastguard Worker.ifc \o0, d16
1360*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o0, d16, d21 // out0
1361*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d31, d26 // t3a
1362*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o15,d31, d26 // out15
1363*c0909341SAndroid Build Coastguard Worker.else
1364*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d4,  d16, d21 // out0
1365*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d31, d26 // t3a
1366*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o15,d31, d26 // out15
1367*c0909341SAndroid Build Coastguard Worker        vmov            \o0, d4
1368*c0909341SAndroid Build Coastguard Worker.endif
1369*c0909341SAndroid Build Coastguard Worker
1370*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d3,  d29, d18 // t15a
1371*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o13,d29, d18 // out13
1372*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o2, d17, d30 // out2
1373*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d26, d17, d30 // t14a
1374*c0909341SAndroid Build Coastguard Worker
1375*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o1, d19, d27 // out1
1376*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d27, d19, d27 // t10
1377*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o14,d28, d20 // out14
1378*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d28, d20 // t11
1379*c0909341SAndroid Build Coastguard Worker
1380*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o3, d22, d24 // out3
1381*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d22, d22, d24 // t6
1382*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \o12,d25, d23 // out12
1383*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d23, d25, d23 // t7
1384*c0909341SAndroid Build Coastguard Worker
1385*c0909341SAndroid Build Coastguard Worker        // Not clipping the output registers, as they will be downshifted and
1386*c0909341SAndroid Build Coastguard Worker        // narrowed afterwards anyway.
1387*c0909341SAndroid Build Coastguard Worker.irp r, d2, d21, d3, d26, d27, d20, d22, d23
1388*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
1389*c0909341SAndroid Build Coastguard Worker.endr
1390*c0909341SAndroid Build Coastguard Worker.irp r, d2, d21, d3, d26, d27, d20, d22, d23
1391*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
1392*c0909341SAndroid Build Coastguard Worker.endr
1393*c0909341SAndroid Build Coastguard Worker
1394*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o15, \o15    // out15
1395*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o13,\o13     // out13
1396*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o1, \o1      // out1
1397*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o3, \o3      // out3
1398*c0909341SAndroid Build Coastguard Worker
1399*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d24, d2,  d21, d0[0], d0[0] // -> out8 (d24 or d23)
1400*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d2,  d21, d0[0], d0[0] // -> out7 (d23 or d24)
1401*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d26, d3,  d0[0], d0[0] // -> out5 (d21 or d26)
1402*c0909341SAndroid Build Coastguard Worker
1403*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d24, #12 // out8
1404*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d4,  d4,  #12 // out7
1405*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d5,  d6,  #12 // out5
1406*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d26, d3,  d0[0], d0[0] // -> out10 (d26 or d21)
1407*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d2,  d22, d23, d0[0], d0[0] // -> out4 (d20 or d27)
1408*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d8,  #12 // out10
1409*c0909341SAndroid Build Coastguard Worker
1410*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d22, d23, d0[0], d0[0] // -> out11 (d27 or d20)
1411*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d22, d27, d20, d0[0], d0[0] // -> out6 (d22 or d25)
1412*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d27, d20, d0[0], d0[0] // -> out9 (d25 or d22)
1413*c0909341SAndroid Build Coastguard Worker
1414*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o4, d2,  #12 // out4
1415*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d7,  d6,  #12 // out9
1416*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d6,  d8,  #12 // out11
1417*c0909341SAndroid Build Coastguard Worker        vrshr.s32       \o6, d22, #12 // out6
1418*c0909341SAndroid Build Coastguard Worker
1419*c0909341SAndroid Build Coastguard Worker.ifc \o8, d23
1420*c0909341SAndroid Build Coastguard Worker        vmov            \o8, d24
1421*c0909341SAndroid Build Coastguard Worker        vmov            \o10,d26
1422*c0909341SAndroid Build Coastguard Worker.endif
1423*c0909341SAndroid Build Coastguard Worker
1424*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o7, d4  // out7
1425*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o5, d5  // out5
1426*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o11,d6  // out11
1427*c0909341SAndroid Build Coastguard Worker        vqneg.s32       \o9, d7  // out9
1428*c0909341SAndroid Build Coastguard Worker.endm
1429*c0909341SAndroid Build Coastguard Worker
1430*c0909341SAndroid Build Coastguard Workerfunction inv_adst_2s_x16_neon
1431*c0909341SAndroid Build Coastguard Worker        iadst_16        d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1432*c0909341SAndroid Build Coastguard Worker        bx              lr
1433*c0909341SAndroid Build Coastguard Workerendfunc
1434*c0909341SAndroid Build Coastguard Worker
1435*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_2s_x16_neon
1436*c0909341SAndroid Build Coastguard Worker        iadst_16        d31, d30, d29, d28, d27, d26, d25, d24, d23, d22, d21, d20, d19, d18, d17, d16
1437*c0909341SAndroid Build Coastguard Worker        bx              lr
1438*c0909341SAndroid Build Coastguard Workerendfunc
1439*c0909341SAndroid Build Coastguard Worker
1440*c0909341SAndroid Build Coastguard Workerfunction inv_identity_2s_x16_neon
1441*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
1442*c0909341SAndroid Build Coastguard Worker        movt            r12, #2*(5793-4096)*8
1443*c0909341SAndroid Build Coastguard Worker        vdup.32         d0,  r12
1444*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1445*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q1,  \i,  d0[0]
1446*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \i,  \i,  \i
1447*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \i,  \i,  q1
1448*c0909341SAndroid Build Coastguard Worker.endr
1449*c0909341SAndroid Build Coastguard Worker        bx              lr
1450*c0909341SAndroid Build Coastguard Workerendfunc
1451*c0909341SAndroid Build Coastguard Worker
1452*c0909341SAndroid Build Coastguard Worker.macro identity_8x4_shift1 c
1453*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1454*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q2,  \i,  \c
1455*c0909341SAndroid Build Coastguard Worker        vrshr.s32       q2,  q2,  #1
1456*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \i,  \i,  q2
1457*c0909341SAndroid Build Coastguard Worker.endr
1458*c0909341SAndroid Build Coastguard Worker.endm
1459*c0909341SAndroid Build Coastguard Worker
1460*c0909341SAndroid Build Coastguard Worker.macro identity_8x4 c
1461*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1462*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    q2,  \i,  \c
1463*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \i,  \i,  \i
1464*c0909341SAndroid Build Coastguard Worker        vqadd.s32       \i,  \i,  q2
1465*c0909341SAndroid Build Coastguard Worker.endr
1466*c0909341SAndroid Build Coastguard Worker.endm
1467*c0909341SAndroid Build Coastguard Worker
1468*c0909341SAndroid Build Coastguard Worker.macro def_horz_16 scale=0, shift=2, suffix
1469*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_16x2_neon
1470*c0909341SAndroid Build Coastguard Worker        push            {lr}
1471*c0909341SAndroid Build Coastguard Worker        vmov.i32        d7,  #0
1472*c0909341SAndroid Build Coastguard Worker.if \scale
1473*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
1474*c0909341SAndroid Build Coastguard Worker        vdup.32         d1,  r12
1475*c0909341SAndroid Build Coastguard Worker.endif
1476*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1477*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r7, :64]
1478*c0909341SAndroid Build Coastguard Worker        vst1.32         {d7}, [r7, :64], r8
1479*c0909341SAndroid Build Coastguard Worker.endr
1480*c0909341SAndroid Build Coastguard Worker.if \scale
1481*c0909341SAndroid Build Coastguard Worker        scale_input     d1[0], q8,  q9, q10, q11, q12, q13, q14, q15
1482*c0909341SAndroid Build Coastguard Worker.endif
1483*c0909341SAndroid Build Coastguard Worker        blx             r4
1484*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #\shift
1485*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #\shift
1486*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #\shift
1487*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #\shift
1488*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q12, #\shift
1489*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q13, #\shift
1490*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q14, #\shift
1491*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #\shift
1492*c0909341SAndroid Build Coastguard Worker.if \scale
1493*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x2_epilog)
1494*c0909341SAndroid Build Coastguard Worker.else
1495*c0909341SAndroid Build Coastguard WorkerL(horz_16x2_epilog):
1496*c0909341SAndroid Build Coastguard Worker        vuzp.16         q8,  q9
1497*c0909341SAndroid Build Coastguard Worker        vuzp.16         q10, q11
1498*c0909341SAndroid Build Coastguard Worker
1499*c0909341SAndroid Build Coastguard Worker.irp i, q8, q10, q9, q11
1500*c0909341SAndroid Build Coastguard Worker        vst1.16         {\i}, [r6, :128]!
1501*c0909341SAndroid Build Coastguard Worker.endr
1502*c0909341SAndroid Build Coastguard Worker
1503*c0909341SAndroid Build Coastguard Worker        pop             {pc}
1504*c0909341SAndroid Build Coastguard Worker.endif
1505*c0909341SAndroid Build Coastguard Workerendfunc
1506*c0909341SAndroid Build Coastguard Worker.endm
1507*c0909341SAndroid Build Coastguard Worker
1508*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, shift=1, suffix=_scale
1509*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, shift=2
1510*c0909341SAndroid Build Coastguard Worker
1511*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_4x16_neon
1512*c0909341SAndroid Build Coastguard Worker        push            {lr}
1513*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1514*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
1515*c0909341SAndroid Build Coastguard Worker.endr
1516*c0909341SAndroid Build Coastguard Worker        blx             r5
1517*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 r6, r7
1518*c0909341SAndroid Build Coastguard Worker        pop             {pc}
1519*c0909341SAndroid Build Coastguard Workerendfunc
1520*c0909341SAndroid Build Coastguard Worker
1521*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x16_neon
1522*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
1523*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1524*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14
1525*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
1526*c0909341SAndroid Build Coastguard Worker.if \i > 0
1527*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
1528*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
1529*c0909341SAndroid Build Coastguard Worker        blt             1f
1530*c0909341SAndroid Build Coastguard Worker.if \i < 14
1531*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1532*c0909341SAndroid Build Coastguard Worker.endif
1533*c0909341SAndroid Build Coastguard Worker.endif
1534*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
1535*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*4
1536*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x2_neon
1537*c0909341SAndroid Build Coastguard Worker.endr
1538*c0909341SAndroid Build Coastguard Worker        b               3f
1539*c0909341SAndroid Build Coastguard Worker1:
1540*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1541*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1542*c0909341SAndroid Build Coastguard Worker2:
1543*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
1544*c0909341SAndroid Build Coastguard Worker.rept 2
1545*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1546*c0909341SAndroid Build Coastguard Worker.endr
1547*c0909341SAndroid Build Coastguard Worker        bgt             2b
1548*c0909341SAndroid Build Coastguard Worker3:
1549*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
1550*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
1551*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1552*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32
1553*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
1554*c0909341SAndroid Build Coastguard Worker.endr
1555*c0909341SAndroid Build Coastguard Worker
1556*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
1557*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1558*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1559*c0909341SAndroid Build Coastguard Workerendfunc
1560*c0909341SAndroid Build Coastguard Worker
1561*c0909341SAndroid Build Coastguard Workerconst eob_16x16
1562*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 36, 55, 78, 105, 256
1563*c0909341SAndroid Build Coastguard Workerendconst
1564*c0909341SAndroid Build Coastguard Worker
1565*c0909341SAndroid Build Coastguard Workerconst eob_16x16_identity
1566*c0909341SAndroid Build Coastguard Worker        .short 2, 4, 6, 8, 10, 12, 14, 256
1567*c0909341SAndroid Build Coastguard Workerendconst
1568*c0909341SAndroid Build Coastguard Worker
1569*c0909341SAndroid Build Coastguard Worker.macro def_fn_16x16 txfm1, txfm2
1570*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_16x16_16bpc_neon, export=1
1571*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1572*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  16,  2
1573*c0909341SAndroid Build Coastguard Worker.endif
1574*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1575*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1576*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_2s_x16_neon
1577*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_4h_x16_neon)
1578*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1579*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1580*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16
1581*c0909341SAndroid Build Coastguard Worker.else
1582*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16_identity
1583*c0909341SAndroid Build Coastguard Worker.endif
1584*c0909341SAndroid Build Coastguard Worker.else
1585*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1586*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16_identity
1587*c0909341SAndroid Build Coastguard Worker.else
1588*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16
1589*c0909341SAndroid Build Coastguard Worker.endif
1590*c0909341SAndroid Build Coastguard Worker.endif
1591*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_16x16_neon
1592*c0909341SAndroid Build Coastguard Workerendfunc
1593*c0909341SAndroid Build Coastguard Worker.endm
1594*c0909341SAndroid Build Coastguard Worker
1595*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, dct
1596*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, identity
1597*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, adst
1598*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, flipadst
1599*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, identity
1600*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, dct
1601*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, adst
1602*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, flipadst
1603*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, dct
1604*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, adst
1605*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, flipadst
1606*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, dct
1607*c0909341SAndroid Build Coastguard Worker
1608*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x4_neon
1609*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
1610*c0909341SAndroid Build Coastguard Worker        mov             r11, #16
1611*c0909341SAndroid Build Coastguard Worker        blt             1f
1612*c0909341SAndroid Build Coastguard Worker
1613*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #8
1614*c0909341SAndroid Build Coastguard Worker        vmov.i32        d4,  #0
1615*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1616*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r6, :64]
1617*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4}, [r6, :64], r11
1618*c0909341SAndroid Build Coastguard Worker.endr
1619*c0909341SAndroid Build Coastguard Worker        blx             r4
1620*c0909341SAndroid Build Coastguard Worker
1621*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
1622*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #1
1623*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #1
1624*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #1
1625*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q12, #1
1626*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q13, #1
1627*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q14, #1
1628*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #1
1629*c0909341SAndroid Build Coastguard Worker        vuzp.16         q8,  q9
1630*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
1631*c0909341SAndroid Build Coastguard Worker        vuzp.16         q10, q11
1632*c0909341SAndroid Build Coastguard Worker        vpush           {q8-q11}
1633*c0909341SAndroid Build Coastguard Worker
1634*c0909341SAndroid Build Coastguard Worker        b               2f
1635*c0909341SAndroid Build Coastguard Worker
1636*c0909341SAndroid Build Coastguard Worker1:
1637*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
1638*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
1639*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
1640*c0909341SAndroid Build Coastguard Worker        vpush           {q8-q9}
1641*c0909341SAndroid Build Coastguard Worker        vpush           {q8-q9}
1642*c0909341SAndroid Build Coastguard Worker
1643*c0909341SAndroid Build Coastguard Worker2:
1644*c0909341SAndroid Build Coastguard Worker        vmov.i32        d4,  #0
1645*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1646*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r2, :64]
1647*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4}, [r2, :64], r11
1648*c0909341SAndroid Build Coastguard Worker.endr
1649*c0909341SAndroid Build Coastguard Worker
1650*c0909341SAndroid Build Coastguard Worker        blx             r4
1651*c0909341SAndroid Build Coastguard Worker
1652*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
1653*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #1
1654*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #1
1655*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #1
1656*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q12, #1
1657*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q13, #1
1658*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q14, #1
1659*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #1
1660*c0909341SAndroid Build Coastguard Worker        vuzp.16         q8,  q9
1661*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
1662*c0909341SAndroid Build Coastguard Worker        vuzp.16         q10, q11
1663*c0909341SAndroid Build Coastguard Worker
1664*c0909341SAndroid Build Coastguard Worker        vmov            q12, q10
1665*c0909341SAndroid Build Coastguard Worker        vmov            q13, q11
1666*c0909341SAndroid Build Coastguard Worker
1667*c0909341SAndroid Build Coastguard Worker        vpop            {q10-q11}
1668*c0909341SAndroid Build Coastguard Worker        blx             r5
1669*c0909341SAndroid Build Coastguard Worker        mov             r6,  r0
1670*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r6, r7
1671*c0909341SAndroid Build Coastguard Worker
1672*c0909341SAndroid Build Coastguard Worker        vpop            {q10-q11}
1673*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q12
1674*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q13
1675*c0909341SAndroid Build Coastguard Worker        blx             r5
1676*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #16
1677*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r6, r7
1678*c0909341SAndroid Build Coastguard Worker
1679*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1680*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1681*c0909341SAndroid Build Coastguard Workerendfunc
1682*c0909341SAndroid Build Coastguard Worker
1683*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x16_neon
1684*c0909341SAndroid Build Coastguard Worker        ldrh            r9,  [r10, #4]
1685*c0909341SAndroid Build Coastguard Worker
1686*c0909341SAndroid Build Coastguard Worker        mov             r11, #64
1687*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r9
1688*c0909341SAndroid Build Coastguard Worker        ldrh            r9,  [r10, #2]
1689*c0909341SAndroid Build Coastguard Worker        blt             1f
1690*c0909341SAndroid Build Coastguard Worker
1691*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #48
1692*c0909341SAndroid Build Coastguard Worker        vmov.i32        q2,  #0
1693*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1694*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r6, :128]
1695*c0909341SAndroid Build Coastguard Worker        vst1.32         {q2}, [r6, :128], r11
1696*c0909341SAndroid Build Coastguard Worker.endr
1697*c0909341SAndroid Build Coastguard Worker        blx             r4
1698*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q8,  #1
1699*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q9,  #1
1700*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q10, #1
1701*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q11, #1
1702*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q14, q15, d28, d29, d30, d31
1703*c0909341SAndroid Build Coastguard Worker
1704*c0909341SAndroid Build Coastguard Worker        b               2f
1705*c0909341SAndroid Build Coastguard Worker1:
1706*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
1707*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
1708*c0909341SAndroid Build Coastguard Worker2:
1709*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r9
1710*c0909341SAndroid Build Coastguard Worker        ldrh            r9,  [r10]
1711*c0909341SAndroid Build Coastguard Worker        blt             1f
1712*c0909341SAndroid Build Coastguard Worker
1713*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #32
1714*c0909341SAndroid Build Coastguard Worker        vmov.i32        q2,  #0
1715*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1716*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r6, :128]
1717*c0909341SAndroid Build Coastguard Worker        vst1.32         {q2}, [r6, :128], r11
1718*c0909341SAndroid Build Coastguard Worker.endr
1719*c0909341SAndroid Build Coastguard Worker        blx             r4
1720*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q8,  #1
1721*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q9,  #1
1722*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q10, #1
1723*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q11, #1
1724*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q12, q13, d24, d25, d26, d27
1725*c0909341SAndroid Build Coastguard Worker
1726*c0909341SAndroid Build Coastguard Worker        b               2f
1727*c0909341SAndroid Build Coastguard Worker1:
1728*c0909341SAndroid Build Coastguard Worker        vmov.i16        q12, #0
1729*c0909341SAndroid Build Coastguard Worker        vmov.i16        q13, #0
1730*c0909341SAndroid Build Coastguard Worker2:
1731*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r9
1732*c0909341SAndroid Build Coastguard Worker        blt             1f
1733*c0909341SAndroid Build Coastguard Worker
1734*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #16
1735*c0909341SAndroid Build Coastguard Worker        vmov.i32        q2,  #0
1736*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1737*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r6, :128]
1738*c0909341SAndroid Build Coastguard Worker        vst1.32         {q2}, [r6, :128], r11
1739*c0909341SAndroid Build Coastguard Worker.endr
1740*c0909341SAndroid Build Coastguard Worker        blx             r4
1741*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
1742*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #1
1743*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #1
1744*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #1
1745*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1746*c0909341SAndroid Build Coastguard Worker
1747*c0909341SAndroid Build Coastguard Worker        b               2f
1748*c0909341SAndroid Build Coastguard Worker1:
1749*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
1750*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
1751*c0909341SAndroid Build Coastguard Worker2:
1752*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1753*c0909341SAndroid Build Coastguard Worker        vpush           {q8-q9}
1754*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1755*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
1756*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r2, :128], r11
1757*c0909341SAndroid Build Coastguard Worker.endr
1758*c0909341SAndroid Build Coastguard Worker        blx             r4
1759*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
1760*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #1
1761*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #1
1762*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #1
1763*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1764*c0909341SAndroid Build Coastguard Worker        vpop            {q10-q11}
1765*c0909341SAndroid Build Coastguard Worker
1766*c0909341SAndroid Build Coastguard Worker        blx             r5
1767*c0909341SAndroid Build Coastguard Worker
1768*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 r0, r6
1769*c0909341SAndroid Build Coastguard Worker
1770*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1771*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1772*c0909341SAndroid Build Coastguard Workerendfunc
1773*c0909341SAndroid Build Coastguard Worker
1774*c0909341SAndroid Build Coastguard Workerconst eob_4x16
1775*c0909341SAndroid Build Coastguard Worker        .short 13, 29, 45, 64
1776*c0909341SAndroid Build Coastguard Workerendconst
1777*c0909341SAndroid Build Coastguard Worker
1778*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity1
1779*c0909341SAndroid Build Coastguard Worker        .short 16, 32, 48, 64
1780*c0909341SAndroid Build Coastguard Workerendconst
1781*c0909341SAndroid Build Coastguard Worker
1782*c0909341SAndroid Build Coastguard Workerconst eob_4x16_identity2
1783*c0909341SAndroid Build Coastguard Worker        .short 4, 8, 12, 64
1784*c0909341SAndroid Build Coastguard Workerendconst
1785*c0909341SAndroid Build Coastguard Worker
1786*c0909341SAndroid Build Coastguard Worker.macro def_fn_416 w, h, txfm1, txfm2, eob_16x4
1787*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
1788*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1789*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1790*c0909341SAndroid Build Coastguard Worker.endif
1791*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1792*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1793*c0909341SAndroid Build Coastguard Worker.if \w == 4
1794*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4s_x\w\()_neon
1795*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_4h_x\h\()_neon)
1796*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1797*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1798*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_4x16
1799*c0909341SAndroid Build Coastguard Worker.else
1800*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_4x16_identity1
1801*c0909341SAndroid Build Coastguard Worker.endif
1802*c0909341SAndroid Build Coastguard Worker.else
1803*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1804*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_4x16_identity2
1805*c0909341SAndroid Build Coastguard Worker.else
1806*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_4x16
1807*c0909341SAndroid Build Coastguard Worker.endif
1808*c0909341SAndroid Build Coastguard Worker.endif
1809*c0909341SAndroid Build Coastguard Worker.else
1810*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_16x4
1811*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_2s_x\w\()_neon
1812*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_8h_x\h\()_neon)
1813*c0909341SAndroid Build Coastguard Worker.endif
1814*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1815*c0909341SAndroid Build Coastguard Workerendfunc
1816*c0909341SAndroid Build Coastguard Worker.endm
1817*c0909341SAndroid Build Coastguard Worker
1818*c0909341SAndroid Build Coastguard Worker.macro def_fns_416 w, h
1819*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, dct, 3
1820*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, identity, 3
1821*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, adst, 3
1822*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, flipadst, 3
1823*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, identity, 2
1824*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, dct, 3
1825*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, adst, 3
1826*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, flipadst, 3
1827*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, dct, 3
1828*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, adst, 3
1829*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, flipadst, 3
1830*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, dct, 2
1831*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, identity, 2
1832*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, identity, 2
1833*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, adst, 2
1834*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, flipadst, 2
1835*c0909341SAndroid Build Coastguard Worker.endm
1836*c0909341SAndroid Build Coastguard Worker
1837*c0909341SAndroid Build Coastguard Workerdef_fns_416 4, 16
1838*c0909341SAndroid Build Coastguard Workerdef_fns_416 16, 4
1839*c0909341SAndroid Build Coastguard Worker
1840*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x8_neon
1841*c0909341SAndroid Build Coastguard Worker        sub_sp_align    256
1842*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1843*c0909341SAndroid Build Coastguard Worker
1844*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6
1845*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
1846*c0909341SAndroid Build Coastguard Worker.if \i > 0
1847*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(8 - \i)
1848*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
1849*c0909341SAndroid Build Coastguard Worker        blt             1f
1850*c0909341SAndroid Build Coastguard Worker.if \i < 6
1851*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1852*c0909341SAndroid Build Coastguard Worker.endif
1853*c0909341SAndroid Build Coastguard Worker.endif
1854*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
1855*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*4
1856*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_16x2_neon
1857*c0909341SAndroid Build Coastguard Worker.endr
1858*c0909341SAndroid Build Coastguard Worker        b               3f
1859*c0909341SAndroid Build Coastguard Worker1:
1860*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1861*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1862*c0909341SAndroid Build Coastguard Worker2:
1863*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
1864*c0909341SAndroid Build Coastguard Worker.rept 2
1865*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1866*c0909341SAndroid Build Coastguard Worker.endr
1867*c0909341SAndroid Build Coastguard Worker        bgt             2b
1868*c0909341SAndroid Build Coastguard Worker3:
1869*c0909341SAndroid Build Coastguard Worker
1870*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1871*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1872*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32
1873*c0909341SAndroid Build Coastguard Worker.irp j, q8, q9, q10, q11, q12, q13, q14, q15
1874*c0909341SAndroid Build Coastguard Worker        vld1.16         {\j}, [r7, :128], r8
1875*c0909341SAndroid Build Coastguard Worker.endr
1876*c0909341SAndroid Build Coastguard Worker        blx             r5
1877*c0909341SAndroid Build Coastguard Worker
1878*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
1879*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r6, r7
1880*c0909341SAndroid Build Coastguard Worker.endr
1881*c0909341SAndroid Build Coastguard Worker
1882*c0909341SAndroid Build Coastguard Worker        add_sp_align    256
1883*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1884*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1885*c0909341SAndroid Build Coastguard Workerendfunc
1886*c0909341SAndroid Build Coastguard Worker
1887*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x16_neon
1888*c0909341SAndroid Build Coastguard Worker        add             r10, r10, #2
1889*c0909341SAndroid Build Coastguard Worker        sub_sp_align    256
1890*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #4
1891*c0909341SAndroid Build Coastguard Worker
1892*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
1893*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*8*2)
1894*c0909341SAndroid Build Coastguard Worker.if \i > 0
1895*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
1896*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
1897*c0909341SAndroid Build Coastguard Worker        blt             1f
1898*c0909341SAndroid Build Coastguard Worker.if \i < 12
1899*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #4
1900*c0909341SAndroid Build Coastguard Worker.endif
1901*c0909341SAndroid Build Coastguard Worker.endif
1902*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
1903*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*4
1904*c0909341SAndroid Build Coastguard Worker
1905*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
1906*c0909341SAndroid Build Coastguard Worker        vmov.i32        q2,  #0
1907*c0909341SAndroid Build Coastguard Worker        vdup.32         d0,  r12
1908*c0909341SAndroid Build Coastguard Worker
1909*c0909341SAndroid Build Coastguard Worker.irp j, q8, q9, q10, q11, q12, q13, q14, q15
1910*c0909341SAndroid Build Coastguard Worker        vld1.32         {\j}, [r7, :128]
1911*c0909341SAndroid Build Coastguard Worker        vst1.32         {q2}, [r7, :128], r8
1912*c0909341SAndroid Build Coastguard Worker.endr
1913*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9,  q10, q11, q12, q13, q14, q15
1914*c0909341SAndroid Build Coastguard Worker        blx             r4
1915*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
1916*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q9,  #1
1917*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q10, #1
1918*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q11, #1
1919*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q12, #1
1920*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q13, #1
1921*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q14, #1
1922*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #1
1923*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1924*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
1925*c0909341SAndroid Build Coastguard Worker.irp j, d16, d20, d17, d21, d18, d22, d19, d23
1926*c0909341SAndroid Build Coastguard Worker        vst1.16         {\j},  [r6, :64]!
1927*c0909341SAndroid Build Coastguard Worker.endr
1928*c0909341SAndroid Build Coastguard Worker.endr
1929*c0909341SAndroid Build Coastguard Worker        b               3f
1930*c0909341SAndroid Build Coastguard Worker1:
1931*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1932*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1933*c0909341SAndroid Build Coastguard Worker2:
1934*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
1935*c0909341SAndroid Build Coastguard Worker.rept 2
1936*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1937*c0909341SAndroid Build Coastguard Worker.endr
1938*c0909341SAndroid Build Coastguard Worker        bgt             2b
1939*c0909341SAndroid Build Coastguard Worker3:
1940*c0909341SAndroid Build Coastguard Worker
1941*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
1942*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
1943*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1944*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16
1945*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
1946*c0909341SAndroid Build Coastguard Worker.endr
1947*c0909341SAndroid Build Coastguard Worker
1948*c0909341SAndroid Build Coastguard Worker        add_sp_align    256
1949*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1950*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1951*c0909341SAndroid Build Coastguard Workerendfunc
1952*c0909341SAndroid Build Coastguard Worker
1953*c0909341SAndroid Build Coastguard Workerconst eob_8x16
1954*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 43, 59, 75, 91, 128
1955*c0909341SAndroid Build Coastguard Workerendconst
1956*c0909341SAndroid Build Coastguard Worker
1957*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity1
1958*c0909341SAndroid Build Coastguard Worker        .short 2, 4, 6, 64, 80, 96, 112, 128
1959*c0909341SAndroid Build Coastguard Workerendconst
1960*c0909341SAndroid Build Coastguard Worker
1961*c0909341SAndroid Build Coastguard Workerconst eob_8x16_identity2
1962*c0909341SAndroid Build Coastguard Worker        .short 2, 4, 6, 8, 10, 12, 14, 128
1963*c0909341SAndroid Build Coastguard Workerendconst
1964*c0909341SAndroid Build Coastguard Worker
1965*c0909341SAndroid Build Coastguard Worker.macro def_fn_816 w, h, txfm1, txfm2
1966*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_16bpc_neon, export=1
1967*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1968*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1969*c0909341SAndroid Build Coastguard Worker.endif
1970*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1971*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1972*c0909341SAndroid Build Coastguard Worker.if \w == 8
1973*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4s_x8_neon
1974*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_4h_x16_neon)
1975*c0909341SAndroid Build Coastguard Worker.else
1976*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_2s_x16_neon
1977*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_\txfm2\()_8h_x8_neon)
1978*c0909341SAndroid Build Coastguard Worker.endif
1979*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1980*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1981*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x16
1982*c0909341SAndroid Build Coastguard Worker.else
1983*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x16_identity1
1984*c0909341SAndroid Build Coastguard Worker.endif
1985*c0909341SAndroid Build Coastguard Worker.else
1986*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1987*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x16_identity2
1988*c0909341SAndroid Build Coastguard Worker.else
1989*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x16
1990*c0909341SAndroid Build Coastguard Worker.endif
1991*c0909341SAndroid Build Coastguard Worker.endif
1992*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1993*c0909341SAndroid Build Coastguard Workerendfunc
1994*c0909341SAndroid Build Coastguard Worker.endm
1995*c0909341SAndroid Build Coastguard Worker
1996*c0909341SAndroid Build Coastguard Worker.macro def_fns_816 w, h
1997*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, dct
1998*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, identity
1999*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, adst
2000*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, flipadst
2001*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, identity
2002*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, dct
2003*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, adst
2004*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, flipadst
2005*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, dct
2006*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, adst
2007*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, flipadst
2008*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, dct
2009*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, identity
2010*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, identity
2011*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, adst
2012*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, flipadst
2013*c0909341SAndroid Build Coastguard Worker.endm
2014*c0909341SAndroid Build Coastguard Worker
2015*c0909341SAndroid Build Coastguard Workerdef_fns_816 8, 16
2016*c0909341SAndroid Build Coastguard Workerdef_fns_816 16, 8
2017*c0909341SAndroid Build Coastguard Worker
2018*c0909341SAndroid Build Coastguard Workerfunction inv_dct32_odd_2s_x16_neon
2019*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs, 4*16
2020*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]!
2021*c0909341SAndroid Build Coastguard Worker
2022*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d16, d31, d0[0], d0[1] // -> t16a
2023*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d16, d31, d0[1], d0[0] // -> t31a
2024*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d24, d23, d1[0], d1[1] // -> t17a
2025*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d16, d4,  #12               // t16a
2026*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d31, d6,  #12               // t31a
2027*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d24, d23, d1[1], d1[0] // -> t30a
2028*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d20, d27, d2[0], d2[1] // -> t18a
2029*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d8,  #12               // t17a
2030*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d4,  #12               // t30a
2031*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d20, d27, d2[1], d2[0] // -> t29a
2032*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d28, d19, d3[0], d3[1] // -> t19a
2033*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d6,  #12               // t18a
2034*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d8,  #12               // t29a
2035*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d28, d19, d3[1], d3[0] // -> t28a
2036*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
2037*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #4*24
2038*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d18, d29, d0[0], d0[1] // -> t20a
2039*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d28, d4,  #12               // t19a
2040*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d6,  #12               // t28a
2041*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d18, d29, d0[1], d0[0] // -> t27a
2042*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d26, d21, d1[0], d1[1] // -> t21a
2043*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d8,  #12               // t20a
2044*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d4,  #12               // t27a
2045*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d26, d21, d1[1], d1[0] // -> t26a
2046*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d22, d25, d2[0], d2[1] // -> t22a
2047*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d6,  #12               // t21a
2048*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d8,  #12               // t26a
2049*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d22, d25, d2[1], d2[0] // -> t25a
2050*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d30, d17, d3[0], d3[1] // -> t23a
2051*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d4,  #12               // t22a
2052*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d6,  #12               // t25a
2053*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d30, d17, d3[1], d3[0] // -> t24a
2054*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d30, d8,  #12               // t23a
2055*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d4,  #12               // t24a
2056*c0909341SAndroid Build Coastguard Worker
2057*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]
2058*c0909341SAndroid Build Coastguard Worker
2059*c0909341SAndroid Build Coastguard Worker        vmov.i32        d11, #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
2060*c0909341SAndroid Build Coastguard Worker        vmvn.i32        d10, #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
2061*c0909341SAndroid Build Coastguard Worker
2062*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d5,  d16, d24 // t17
2063*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d16, d24 // t16
2064*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d7,  d31, d23 // t30
2065*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d23 // t31
2066*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d24, d28, d20 // t18
2067*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d28, d28, d20 // t19
2068*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d18, d26 // t20
2069*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d18, d18, d26 // t21
2070*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d30, d22 // t22
2071*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d30, d30, d22 // t23
2072*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d26, d17, d25 // t24
2073*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d17, d17, d25 // t25
2074*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d22, d29, d21 // t26
2075*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d29, d29, d21 // t27
2076*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d25, d19, d27 // t28
2077*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d19, d19, d27 // t29
2078*c0909341SAndroid Build Coastguard Worker
2079*c0909341SAndroid Build Coastguard Worker.irp r, d5, d16, d7, d31, d24, d28, d23, d18, d20, d30, d26, d17, d22, d29, d25, d19
2080*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
2081*c0909341SAndroid Build Coastguard Worker.endr
2082*c0909341SAndroid Build Coastguard Worker.irp r, d5, d16, d7, d31, d24, d28, d23, d18, d20, d30, d26, d17, d22, d29, d25, d19
2083*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
2084*c0909341SAndroid Build Coastguard Worker.endr
2085*c0909341SAndroid Build Coastguard Worker
2086*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d7,  d5,  d2[0], d2[1] // -> t17a
2087*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d7,  d5,  d2[1], d2[0] // -> t30a
2088*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d19, d24, d2[1], d2[0] // -> t18a
2089*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d4,  #12               // t17a
2090*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d6,  #12               // t30a
2091*c0909341SAndroid Build Coastguard Worker        vneg.s32        d8,  d8                     // -> t18a
2092*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d5,  d19, d24, d2[0], d2[1] // -> t29a
2093*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d22, d18, d3[0], d3[1] // -> t21a
2094*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d8,  #12               // t18a
2095*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d5,  #12               // t29a
2096*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d22, d18, d3[1], d3[0] // -> t26a
2097*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d17, d20, d3[1], d3[0] // -> t22a
2098*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d4,  #12               // t21a
2099*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d6,  #12               // t26a
2100*c0909341SAndroid Build Coastguard Worker        vneg.s32        d8,  d8                     // -> t22a
2101*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d5,  d17, d20, d3[0], d3[1] // -> t25a
2102*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d17, d8,  #12               // t22a
2103*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d5,  #12               // t25a
2104*c0909341SAndroid Build Coastguard Worker
2105*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d2,  d27, d24 // t29
2106*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d27, d27, d24 // t30
2107*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d3,  d21, d19 // t18
2108*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d21, d21, d19 // t17
2109*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d24, d16, d28 // t19a
2110*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d16, d28 // t16a
2111*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d19, d30, d23 // t20a
2112*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d30, d30, d23 // t23a
2113*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d28, d17, d22 // t21
2114*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d17, d22 // t22
2115*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d26, d29 // t24a
2116*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d26, d26, d29 // t27a
2117*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d22, d20, d18 // t25
2118*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d20, d18 // t26
2119*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d31, d25 // t28a
2120*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d25 // t31a
2121*c0909341SAndroid Build Coastguard Worker
2122*c0909341SAndroid Build Coastguard Worker.irp r, d2, d27, d3, d21, d24, d16, d19, d30, d28, d17, d23, d26, d22, d20, d29, d31
2123*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
2124*c0909341SAndroid Build Coastguard Worker.endr
2125*c0909341SAndroid Build Coastguard Worker.irp r, d2, d27, d3, d21, d24, d16, d19, d30, d28, d17, d23, d26, d22, d20, d29, d31
2126*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
2127*c0909341SAndroid Build Coastguard Worker.endr
2128*c0909341SAndroid Build Coastguard Worker
2129*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d2,  d3,  d1[0], d1[1] // -> t18a
2130*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d2,  d3,  d1[1], d1[0] // -> t29a
2131*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d8,  d29, d24, d1[0], d1[1] // -> t19
2132*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d4,  #12               // t18a
2133*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d6,  #12               // t29a
2134*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d5,  d29, d24, d1[1], d1[0] // -> t28
2135*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d26, d19, d1[1], d1[0] // -> t20
2136*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d8,  #12               // t19
2137*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d5,  #12               // t28
2138*c0909341SAndroid Build Coastguard Worker        vneg.s32        d4,  d4                     // -> t20
2139*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d26, d19, d1[0], d1[1] // -> t27
2140*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d8,  d20, d28, d1[1], d1[0] // -> t21a
2141*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d4,  #12               // t20
2142*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d6,  #12               // t27
2143*c0909341SAndroid Build Coastguard Worker        vneg.s32        d8,  d8                     // -> t21a
2144*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d5,  d20, d28, d1[0], d1[1] // -> t26a
2145*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d8,  #12               // t21a
2146*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d28, d5,  #12               // t26a
2147*c0909341SAndroid Build Coastguard Worker
2148*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d2,  d16, d30 // t23
2149*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d16, d30 // t16 = out16
2150*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d3,  d31, d23 // t24
2151*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d31, d23 // t31 = out31
2152*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d23, d21, d17 // t22a
2153*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d21, d17 // t17a = out17
2154*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d30, d27, d22 // t30a = out30
2155*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d27, d22 // t25a
2156*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d27, d18, d20 // t21
2157*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d18, d18, d20 // t18 = out18
2158*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d4,  d29, d26 // t19a = out19
2159*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d26, d29, d26 // t20a
2160*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d29, d25, d28 // t29 = out29
2161*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d25, d28 // t26
2162*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d28, d24, d19 // t28a = out28
2163*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d24, d24, d19 // t27a
2164*c0909341SAndroid Build Coastguard Worker        vmov            d19, d4       // out19
2165*c0909341SAndroid Build Coastguard Worker
2166*c0909341SAndroid Build Coastguard Worker.irp r, d2, d16, d3, d31, d23, d17, d30, d21, d27, d18, d19, d26, d29, d25, d28, d24
2167*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  d11
2168*c0909341SAndroid Build Coastguard Worker.endr
2169*c0909341SAndroid Build Coastguard Worker.irp r, d2, d16, d3, d31, d23, d17, d30, d21, d27, d18, d19, d26, d29, d25, d28, d24
2170*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  d10
2171*c0909341SAndroid Build Coastguard Worker.endr
2172*c0909341SAndroid Build Coastguard Worker
2173*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d24, d26, d0[0], d0[0] // -> t20
2174*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d24, d26, d0[0], d0[0] // -> t27
2175*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d4,  #12   // t20
2176*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d6,  #12   // t27
2177*c0909341SAndroid Build Coastguard Worker
2178*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d25, d27, d0[0], d0[0] // -> t26a
2179*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d25, d27, d0[0], d0[0] // -> t21a
2180*c0909341SAndroid Build Coastguard Worker        vmov            d27, d22        // t27
2181*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d4,  #12   // t26a
2182*c0909341SAndroid Build Coastguard Worker
2183*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d24, d21, d23, d0[0], d0[0] // -> t22
2184*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d21, d23, d0[0], d0[0] // -> t25
2185*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d6,  #12   // t21a
2186*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d22, d24, #12   // t22
2187*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d4,  #12   // t25
2188*c0909341SAndroid Build Coastguard Worker
2189*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d3,  d2,  d0[0], d0[0] // -> t23a
2190*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d3,  d2,  d0[0], d0[0] // -> t24a
2191*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d23, d4,  #12   // t23a
2192*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d24, d6,  #12   // t24a
2193*c0909341SAndroid Build Coastguard Worker
2194*c0909341SAndroid Build Coastguard Worker        bx              lr
2195*c0909341SAndroid Build Coastguard Workerendfunc
2196*c0909341SAndroid Build Coastguard Worker
2197*c0909341SAndroid Build Coastguard Worker.macro def_horz_32 scale=0, shift=2, suffix
2198*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_dct_32x2_neon
2199*c0909341SAndroid Build Coastguard Worker        push            {lr}
2200*c0909341SAndroid Build Coastguard Worker        vmov.i32        d7,  #0
2201*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
2202*c0909341SAndroid Build Coastguard Worker.if \scale
2203*c0909341SAndroid Build Coastguard Worker        mov_const       r12, 2896*8*(1<<16)
2204*c0909341SAndroid Build Coastguard Worker        vdup.32         d0,  r12
2205*c0909341SAndroid Build Coastguard Worker.endif
2206*c0909341SAndroid Build Coastguard Worker
2207*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2208*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r7, :64]
2209*c0909341SAndroid Build Coastguard Worker        vst1.32         {d7}, [r7, :64], r8
2210*c0909341SAndroid Build Coastguard Worker.endr
2211*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2212*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
2213*c0909341SAndroid Build Coastguard Worker.if \scale
2214*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9,  q10, q11, q12, q13, q14, q15
2215*c0909341SAndroid Build Coastguard Worker.endif
2216*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_2s_x16_neon
2217*c0909341SAndroid Build Coastguard Worker
2218*c0909341SAndroid Build Coastguard Worker        // idct_16 leaves the row_clip_max/min constants in d9 and d8,
2219*c0909341SAndroid Build Coastguard Worker        // but here we want to use full q registers for clipping.
2220*c0909341SAndroid Build Coastguard Worker        vmov.i32        q3,  #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
2221*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q2,  #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
2222*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11, q12, q13, q14, q15
2223*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q3
2224*c0909341SAndroid Build Coastguard Worker.endr
2225*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11, q12, q13, q14, q15
2226*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q2
2227*c0909341SAndroid Build Coastguard Worker.endr
2228*c0909341SAndroid Build Coastguard Worker
2229*c0909341SAndroid Build Coastguard Worker        vtrn.32         d16, d17
2230*c0909341SAndroid Build Coastguard Worker        vtrn.32         d18, d19
2231*c0909341SAndroid Build Coastguard Worker        vtrn.32         d20, d21
2232*c0909341SAndroid Build Coastguard Worker        vtrn.32         d22, d23
2233*c0909341SAndroid Build Coastguard Worker        vtrn.32         d24, d25
2234*c0909341SAndroid Build Coastguard Worker        vtrn.32         d26, d27
2235*c0909341SAndroid Build Coastguard Worker        vtrn.32         d28, d29
2236*c0909341SAndroid Build Coastguard Worker        vtrn.32         d30, d31
2237*c0909341SAndroid Build Coastguard Worker
2238*c0909341SAndroid Build Coastguard Worker.macro store1 r0, r1, r2, r3
2239*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r0}, [r6, :64]!
2240*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r1}, [r6, :64]!
2241*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r2}, [r6, :64]!
2242*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r3}, [r6, :64]!
2243*c0909341SAndroid Build Coastguard Worker.endm
2244*c0909341SAndroid Build Coastguard Worker        store1          d16, d18, d20, d22
2245*c0909341SAndroid Build Coastguard Worker        store1          d24, d26, d28, d30
2246*c0909341SAndroid Build Coastguard Worker        store1          d17, d19, d21, d23
2247*c0909341SAndroid Build Coastguard Worker        store1          d25, d27, d29, d31
2248*c0909341SAndroid Build Coastguard Worker.purgem store1
2249*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #64*2
2250*c0909341SAndroid Build Coastguard Worker
2251*c0909341SAndroid Build Coastguard Worker        vmov.i32        d7,  #0
2252*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2253*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r7, :64]
2254*c0909341SAndroid Build Coastguard Worker        vst1.32         {d7}, [r7, :64], r8
2255*c0909341SAndroid Build Coastguard Worker.endr
2256*c0909341SAndroid Build Coastguard Worker.if \scale
2257*c0909341SAndroid Build Coastguard Worker        // This relies on the fact that the idct also leaves the right coeff in d0[1]
2258*c0909341SAndroid Build Coastguard Worker        scale_input     d0[1], q8,  q9,  q10, q11, q12, q13, q14, q15
2259*c0909341SAndroid Build Coastguard Worker.endif
2260*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_2s_x16_neon
2261*c0909341SAndroid Build Coastguard Worker        vtrn.32         d31, d30
2262*c0909341SAndroid Build Coastguard Worker        vtrn.32         d29, d28
2263*c0909341SAndroid Build Coastguard Worker        vtrn.32         d27, d26
2264*c0909341SAndroid Build Coastguard Worker        vtrn.32         d25, d24
2265*c0909341SAndroid Build Coastguard Worker        vtrn.32         d23, d22
2266*c0909341SAndroid Build Coastguard Worker        vtrn.32         d21, d20
2267*c0909341SAndroid Build Coastguard Worker        vtrn.32         d19, d18
2268*c0909341SAndroid Build Coastguard Worker        vtrn.32         d17, d16
2269*c0909341SAndroid Build Coastguard Worker.macro store2 r0, r1, r2, r3, r4, r5, r6, r7, shift
2270*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r6, :128]!
2271*c0909341SAndroid Build Coastguard Worker        vld1.32         {q2, q3}, [r6, :128]
2272*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #32
2273*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d15, d0,  \r0
2274*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d0,  d0,  \r0
2275*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d14, d1,  \r1
2276*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d1,  d1,  \r1
2277*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d13, d2,  \r2
2278*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d2,  d2,  \r2
2279*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d12, d3,  \r3
2280*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d3,  d3,  \r3
2281*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d11, d4,  \r4
2282*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d4,  d4,  \r4
2283*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d10, d5,  \r5
2284*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d5,  d5,  \r5
2285*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d9,  d6,  \r6
2286*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d6,  d6,  \r6
2287*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d8,  d7,  \r7
2288*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d7,  d7,  \r7
2289*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d0,  q0,  #\shift
2290*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d1,  q1,  #\shift
2291*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d2,  q2,  #\shift
2292*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d3,  q3,  #\shift
2293*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q4,  #\shift
2294*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,  q5,  #\shift
2295*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q6,  #\shift
2296*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d7,  q7,  #\shift
2297*c0909341SAndroid Build Coastguard Worker        vrev32.16       q2,  q2
2298*c0909341SAndroid Build Coastguard Worker        vrev32.16       q3,  q3
2299*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r6, :128]!
2300*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2301*c0909341SAndroid Build Coastguard Worker.endm
2302*c0909341SAndroid Build Coastguard Worker
2303*c0909341SAndroid Build Coastguard Worker        store2          d31, d29, d27, d25, d23, d21, d19, d17, \shift
2304*c0909341SAndroid Build Coastguard Worker        store2          d30, d28, d26, d24, d22, d20, d18, d16, \shift
2305*c0909341SAndroid Build Coastguard Worker.purgem store2
2306*c0909341SAndroid Build Coastguard Worker        pop             {pc}
2307*c0909341SAndroid Build Coastguard Workerendfunc
2308*c0909341SAndroid Build Coastguard Worker.endm
2309*c0909341SAndroid Build Coastguard Worker
2310*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=0, shift=2
2311*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=1, shift=1, suffix=_scale
2312*c0909341SAndroid Build Coastguard Worker
2313*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_4x32_neon
2314*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
2315*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
2316*c0909341SAndroid Build Coastguard Worker
2317*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2318*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
2319*c0909341SAndroid Build Coastguard Worker.endr
2320*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2321*c0909341SAndroid Build Coastguard Worker
2322*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct_4h_x16_neon)
2323*c0909341SAndroid Build Coastguard Worker
2324*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2325*c0909341SAndroid Build Coastguard Worker        vst1.16         {\i}, [r7, :64], r8
2326*c0909341SAndroid Build Coastguard Worker.endr
2327*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2328*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
2329*c0909341SAndroid Build Coastguard Worker
2330*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2331*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
2332*c0909341SAndroid Build Coastguard Worker.endr
2333*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2334*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsr #1
2335*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct32_odd_4h_x16_neon)
2336*c0909341SAndroid Build Coastguard Worker
2337*c0909341SAndroid Build Coastguard Worker        neg             r9,  r8
2338*c0909341SAndroid Build Coastguard Worker        mov             r10, r6
2339*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
2340*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
2341*c0909341SAndroid Build Coastguard Worker.macro combine r0, r1, r2, r3, op, stride
2342*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4}, [r7,  :64], \stride
2343*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r10, :64], r1
2344*c0909341SAndroid Build Coastguard Worker        vld1.16         {d5}, [r7,  :64],  \stride
2345*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r10, :64], r1
2346*c0909341SAndroid Build Coastguard Worker        \op\().s16      d4,  d4,  \r0
2347*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6}, [r7,  :64], \stride
2348*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r10, :64], r1
2349*c0909341SAndroid Build Coastguard Worker        \op\().s16      d5,  d5,  \r1
2350*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r10, :64], r1
2351*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
2352*c0909341SAndroid Build Coastguard Worker        \op\().s16      d6,  d6,  \r2
2353*c0909341SAndroid Build Coastguard Worker        vld1.16         {d7}, [r7,  :64], \stride
2354*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q0,  q0,  q2
2355*c0909341SAndroid Build Coastguard Worker        \op\().s16      d7,  d7,  \r3
2356*c0909341SAndroid Build Coastguard Worker        vmax.s16        q0,  q0,  q6
2357*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
2358*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q0,  q7
2359*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q1,  q1,  q3
2360*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0}, [r6,  :64], r1
2361*c0909341SAndroid Build Coastguard Worker        vmax.s16        q1,  q1,  q6
2362*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1}, [r6,  :64], r1
2363*c0909341SAndroid Build Coastguard Worker        vmin.s16        q1,  q1,  q7
2364*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2}, [r6,  :64], r1
2365*c0909341SAndroid Build Coastguard Worker        vst1.16         {d3}, [r6,  :64], r1
2366*c0909341SAndroid Build Coastguard Worker.endm
2367*c0909341SAndroid Build Coastguard Worker        combine         d31, d30, d29, d28, vqadd, r8
2368*c0909341SAndroid Build Coastguard Worker        combine         d27, d26, d25, d24, vqadd, r8
2369*c0909341SAndroid Build Coastguard Worker        combine         d23, d22, d21, d20, vqadd, r8
2370*c0909341SAndroid Build Coastguard Worker        combine         d19, d18, d17, d16, vqadd, r8
2371*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8
2372*c0909341SAndroid Build Coastguard Worker        combine         d16, d17, d18, d19, vqsub, r9
2373*c0909341SAndroid Build Coastguard Worker        combine         d20, d21, d22, d23, vqsub, r9
2374*c0909341SAndroid Build Coastguard Worker        combine         d24, d25, d26, d27, vqsub, r9
2375*c0909341SAndroid Build Coastguard Worker        combine         d28, d29, d30, d31, vqsub, r9
2376*c0909341SAndroid Build Coastguard Worker.purgem combine
2377*c0909341SAndroid Build Coastguard Worker
2378*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
2379*c0909341SAndroid Build Coastguard Workerendfunc
2380*c0909341SAndroid Build Coastguard Worker
2381*c0909341SAndroid Build Coastguard Workerconst eob_32x32
2382*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 36, 55, 78, 105, 136, 171, 210, 253, 300, 351, 406, 465, 1024
2383*c0909341SAndroid Build Coastguard Workerendconst
2384*c0909341SAndroid Build Coastguard Worker
2385*c0909341SAndroid Build Coastguard Workerconst eob_16x32
2386*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 36, 55, 78, 105, 151, 183, 215, 247, 279, 311, 343, 375, 512
2387*c0909341SAndroid Build Coastguard Workerendconst
2388*c0909341SAndroid Build Coastguard Worker
2389*c0909341SAndroid Build Coastguard Workerconst eob_16x32_shortside
2390*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 36, 55, 78, 105, 512
2391*c0909341SAndroid Build Coastguard Workerendconst
2392*c0909341SAndroid Build Coastguard Worker
2393*c0909341SAndroid Build Coastguard Workerconst eob_8x32
2394*c0909341SAndroid Build Coastguard Worker        .short 3, 10, 21, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 256
2395*c0909341SAndroid Build Coastguard Workerendconst
2396*c0909341SAndroid Build Coastguard Worker
2397*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_32x32_16bpc_neon, export=1
2398*c0909341SAndroid Build Coastguard Worker        push            {r4-r7,lr}
2399*c0909341SAndroid Build Coastguard Worker        vpush           {q6-q7}
2400*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  eob_32x32, 2
2401*c0909341SAndroid Build Coastguard Worker
2402*c0909341SAndroid Build Coastguard Worker        mov             r6,  #4*32
2403*c0909341SAndroid Build Coastguard Worker1:
2404*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
2405*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_32x32, 6
2406*c0909341SAndroid Build Coastguard Worker2:
2407*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
2408*c0909341SAndroid Build Coastguard Worker        add             r12, r12, #8
2409*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2410*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r2, :128]
2411*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0}, [r2, :128], r6
2412*c0909341SAndroid Build Coastguard Worker.endr
2413*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
2414*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q12
2415*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q9
2416*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q13
2417*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d20, q10
2418*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d21, q14
2419*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d22, q11
2420*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d23, q15
2421*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
2422*c0909341SAndroid Build Coastguard Worker
2423*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r7, shiftbits=2
2424*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #8
2425*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
2426*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2427*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*8
2428*c0909341SAndroid Build Coastguard Worker        bge             2b
2429*c0909341SAndroid Build Coastguard Worker
2430*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r5], #4
2431*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2432*c0909341SAndroid Build Coastguard Worker        blt             9f
2433*c0909341SAndroid Build Coastguard Worker
2434*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r12, lsl #1
2435*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1,  lsl #2
2436*c0909341SAndroid Build Coastguard Worker        mls             r2,  r6,  r12, r2
2437*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4*4
2438*c0909341SAndroid Build Coastguard Worker        b               1b
2439*c0909341SAndroid Build Coastguard Worker9:
2440*c0909341SAndroid Build Coastguard Worker        vpop            {q6-q7}
2441*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7,pc}
2442*c0909341SAndroid Build Coastguard Workerendfunc
2443*c0909341SAndroid Build Coastguard Worker
2444*c0909341SAndroid Build Coastguard Worker.macro shift_8_regs op, shift
2445*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2446*c0909341SAndroid Build Coastguard Worker        \op             \i,  \i,  #\shift
2447*c0909341SAndroid Build Coastguard Worker.endr
2448*c0909341SAndroid Build Coastguard Worker.endm
2449*c0909341SAndroid Build Coastguard Worker
2450*c0909341SAndroid Build Coastguard Worker.macro def_identity_1632 w, h, wshort, hshort
2451*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1
2452*c0909341SAndroid Build Coastguard Worker        push            {r4-r9,lr}
2453*c0909341SAndroid Build Coastguard Worker        vpush           {q6-q7}
2454*c0909341SAndroid Build Coastguard Worker        mov             r9,  #0
2455*c0909341SAndroid Build Coastguard Worker        mov_const       r8,  2896*8*(1<<16)
2456*c0909341SAndroid Build Coastguard Worker        movt            r9,  #2*(5793-4096)*8
2457*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  eob_16x32\hshort, 2
2458*c0909341SAndroid Build Coastguard Worker
2459*c0909341SAndroid Build Coastguard Worker        mov             r6,  #4*\h
2460*c0909341SAndroid Build Coastguard Worker1:
2461*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
2462*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_16x32\wshort, 6
2463*c0909341SAndroid Build Coastguard Worker2:
2464*c0909341SAndroid Build Coastguard Worker        vdup.i32        d0,  r8
2465*c0909341SAndroid Build Coastguard Worker        vmov.i32        q1,  #0
2466*c0909341SAndroid Build Coastguard Worker        vmov.32         d0[1], r9
2467*c0909341SAndroid Build Coastguard Worker        add             r12, r12, #8
2468*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2469*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r2, :128]
2470*c0909341SAndroid Build Coastguard Worker        vst1.32         {q1}, [r2, :128], r6
2471*c0909341SAndroid Build Coastguard Worker.endr
2472*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9, q10, q11, q12, q13, q14, q15
2473*c0909341SAndroid Build Coastguard Worker
2474*c0909341SAndroid Build Coastguard Worker.if \w == 16
2475*c0909341SAndroid Build Coastguard Worker        // 16x32
2476*c0909341SAndroid Build Coastguard Worker        identity_8x4_shift1 d0[1]
2477*c0909341SAndroid Build Coastguard Worker.else
2478*c0909341SAndroid Build Coastguard Worker        // 32x16
2479*c0909341SAndroid Build Coastguard Worker        shift_8_regs    vqshl.s32, 1
2480*c0909341SAndroid Build Coastguard Worker        identity_8x4    d0[1]
2481*c0909341SAndroid Build Coastguard Worker.endif
2482*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
2483*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q12
2484*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q9
2485*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q13
2486*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d20, q10
2487*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d21, q14
2488*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d22, q11
2489*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d23, q15
2490*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
2491*c0909341SAndroid Build Coastguard Worker
2492*c0909341SAndroid Build Coastguard Worker.if \w == 16
2493*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r7, shiftbits=2
2494*c0909341SAndroid Build Coastguard Worker.else
2495*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r7, shiftbits=4
2496*c0909341SAndroid Build Coastguard Worker.endif
2497*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #8
2498*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
2499*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2500*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*8
2501*c0909341SAndroid Build Coastguard Worker        bge             2b
2502*c0909341SAndroid Build Coastguard Worker
2503*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r5], #4
2504*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2505*c0909341SAndroid Build Coastguard Worker        blt             9f
2506*c0909341SAndroid Build Coastguard Worker
2507*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r12, lsl #1
2508*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1,  lsl #2
2509*c0909341SAndroid Build Coastguard Worker        mls             r2,  r6,  r12, r2
2510*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4*4
2511*c0909341SAndroid Build Coastguard Worker        b               1b
2512*c0909341SAndroid Build Coastguard Worker9:
2513*c0909341SAndroid Build Coastguard Worker        vpop            {q6-q7}
2514*c0909341SAndroid Build Coastguard Worker        pop             {r4-r9,pc}
2515*c0909341SAndroid Build Coastguard Workerendfunc
2516*c0909341SAndroid Build Coastguard Worker.endm
2517*c0909341SAndroid Build Coastguard Worker
2518*c0909341SAndroid Build Coastguard Workerdef_identity_1632 16, 32, _shortside,
2519*c0909341SAndroid Build Coastguard Workerdef_identity_1632 32, 16, , _shortside
2520*c0909341SAndroid Build Coastguard Worker
2521*c0909341SAndroid Build Coastguard Worker.macro def_identity_832 w, h
2522*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_16bpc_neon, export=1
2523*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
2524*c0909341SAndroid Build Coastguard Worker        vpush           {q6-q7}
2525*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_8x32, 2
2526*c0909341SAndroid Build Coastguard Worker
2527*c0909341SAndroid Build Coastguard Worker        mov             r12, #4*\h
2528*c0909341SAndroid Build Coastguard Worker1:
2529*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #4
2530*c0909341SAndroid Build Coastguard Worker.if \w == 8
2531*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
2532*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2533*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r2, :128]
2534*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0}, [r2, :128], r12
2535*c0909341SAndroid Build Coastguard Worker.endr
2536*c0909341SAndroid Build Coastguard Worker
2537*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #1
2538*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q12, #1
2539*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q9,  #1
2540*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q13, #1
2541*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q10, #1
2542*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q14, #1
2543*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q11, #1
2544*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #1
2545*c0909341SAndroid Build Coastguard Worker
2546*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
2547*c0909341SAndroid Build Coastguard Worker
2548*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2549*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r5, shiftbits=2
2550*c0909341SAndroid Build Coastguard Worker        blt             9f
2551*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r12, lsl #3
2552*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4*4
2553*c0909341SAndroid Build Coastguard Worker.else
2554*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
2555*c0909341SAndroid Build Coastguard Worker        vmov.i32        q1,  #0
2556*c0909341SAndroid Build Coastguard Worker        vld1.32         {q8,  q9},  [r2, :128]
2557*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0,  q1},  [r2, :128], r12
2558*c0909341SAndroid Build Coastguard Worker        vld1.32         {q10, q11}, [r2, :128]
2559*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0,  q1},  [r2, :128], r12
2560*c0909341SAndroid Build Coastguard Worker        vld1.32         {q12, q13}, [r2, :128]
2561*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0,  q1},  [r2, :128], r12
2562*c0909341SAndroid Build Coastguard Worker        vld1.32         {q14, q15}, [r2, :128]
2563*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0,  q1},  [r2, :128], r12
2564*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d16, q8
2565*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d17, q10
2566*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d20, q9
2567*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d21, q11
2568*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d18, q12
2569*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d19, q14
2570*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d22, q13
2571*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d23, q15
2572*c0909341SAndroid Build Coastguard Worker
2573*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
2574*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
2575*c0909341SAndroid Build Coastguard Worker
2576*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2577*c0909341SAndroid Build Coastguard Worker        load_add_store_4x8 r0, r5, shiftbits=3
2578*c0909341SAndroid Build Coastguard Worker        blt             9f
2579*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #3
2580*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #2*4
2581*c0909341SAndroid Build Coastguard Worker.endif
2582*c0909341SAndroid Build Coastguard Worker        b               1b
2583*c0909341SAndroid Build Coastguard Worker
2584*c0909341SAndroid Build Coastguard Worker9:
2585*c0909341SAndroid Build Coastguard Worker        vpop            {q6-q7}
2586*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
2587*c0909341SAndroid Build Coastguard Workerendfunc
2588*c0909341SAndroid Build Coastguard Worker.endm
2589*c0909341SAndroid Build Coastguard Worker
2590*c0909341SAndroid Build Coastguard Workerdef_identity_832 8, 32
2591*c0909341SAndroid Build Coastguard Workerdef_identity_832 32, 8
2592*c0909341SAndroid Build Coastguard Worker
2593*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x32_16bpc_neon, export=1
2594*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  32,  2
2595*c0909341SAndroid Build Coastguard Worker
2596*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2597*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2598*c0909341SAndroid Build Coastguard Worker        sub_sp_align    2048
2599*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
2600*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2601*c0909341SAndroid Build Coastguard Worker
2602*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2603*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2604*c0909341SAndroid Build Coastguard Worker.if \i > 0
2605*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
2606*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2607*c0909341SAndroid Build Coastguard Worker        blt             1f
2608*c0909341SAndroid Build Coastguard Worker.if \i < 30
2609*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2610*c0909341SAndroid Build Coastguard Worker.endif
2611*c0909341SAndroid Build Coastguard Worker.endif
2612*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
2613*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*4
2614*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x2_neon
2615*c0909341SAndroid Build Coastguard Worker.endr
2616*c0909341SAndroid Build Coastguard Worker        b               3f
2617*c0909341SAndroid Build Coastguard Worker
2618*c0909341SAndroid Build Coastguard Worker1:
2619*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2620*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2621*c0909341SAndroid Build Coastguard Worker2:
2622*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2623*c0909341SAndroid Build Coastguard Worker.rept 4
2624*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2625*c0909341SAndroid Build Coastguard Worker.endr
2626*c0909341SAndroid Build Coastguard Worker        bgt             2b
2627*c0909341SAndroid Build Coastguard Worker
2628*c0909341SAndroid Build Coastguard Worker3:
2629*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2630*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
2631*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2632*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
2633*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2634*c0909341SAndroid Build Coastguard Worker.endr
2635*c0909341SAndroid Build Coastguard Worker
2636*c0909341SAndroid Build Coastguard Worker        add_sp_align    2048
2637*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2638*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2639*c0909341SAndroid Build Coastguard Workerendfunc
2640*c0909341SAndroid Build Coastguard Worker
2641*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x32_16bpc_neon, export=1
2642*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  32,  1
2643*c0909341SAndroid Build Coastguard Worker
2644*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2645*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2646*c0909341SAndroid Build Coastguard Worker        sub_sp_align    1024
2647*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
2648*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2649*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_dct_2s_x16_neon
2650*c0909341SAndroid Build Coastguard Worker
2651*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
2652*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
2653*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
2654*c0909341SAndroid Build Coastguard Worker.if \i > 0
2655*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
2656*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2657*c0909341SAndroid Build Coastguard Worker        blt             1f
2658*c0909341SAndroid Build Coastguard Worker.if \i < 30
2659*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2660*c0909341SAndroid Build Coastguard Worker.endif
2661*c0909341SAndroid Build Coastguard Worker.endif
2662*c0909341SAndroid Build Coastguard Worker        mov             r8,  #4*32
2663*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_16x2_neon
2664*c0909341SAndroid Build Coastguard Worker.endr
2665*c0909341SAndroid Build Coastguard Worker        b               3f
2666*c0909341SAndroid Build Coastguard Worker
2667*c0909341SAndroid Build Coastguard Worker1:
2668*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2669*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2670*c0909341SAndroid Build Coastguard Worker2:
2671*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2672*c0909341SAndroid Build Coastguard Worker.rept 2
2673*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2674*c0909341SAndroid Build Coastguard Worker.endr
2675*c0909341SAndroid Build Coastguard Worker        bgt             2b
2676*c0909341SAndroid Build Coastguard Worker
2677*c0909341SAndroid Build Coastguard Worker3:
2678*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
2679*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
2680*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2681*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
2682*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2683*c0909341SAndroid Build Coastguard Worker.endr
2684*c0909341SAndroid Build Coastguard Worker
2685*c0909341SAndroid Build Coastguard Worker        add_sp_align    1024
2686*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2687*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2688*c0909341SAndroid Build Coastguard Workerendfunc
2689*c0909341SAndroid Build Coastguard Worker
2690*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x16_16bpc_neon, export=1
2691*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  16,  1
2692*c0909341SAndroid Build Coastguard Worker
2693*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2694*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2695*c0909341SAndroid Build Coastguard Worker        sub_sp_align    1024
2696*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
2697*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2698*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_dct_4h_x16_neon)
2699*c0909341SAndroid Build Coastguard Worker
2700*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14
2701*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2702*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
2703*c0909341SAndroid Build Coastguard Worker.if \i > 0
2704*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
2705*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2706*c0909341SAndroid Build Coastguard Worker        blt             1f
2707*c0909341SAndroid Build Coastguard Worker.if \i < 14
2708*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2709*c0909341SAndroid Build Coastguard Worker.endif
2710*c0909341SAndroid Build Coastguard Worker.endif
2711*c0909341SAndroid Build Coastguard Worker        mov             r8,  #4*16
2712*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x2_neon
2713*c0909341SAndroid Build Coastguard Worker.endr
2714*c0909341SAndroid Build Coastguard Worker        b               3f
2715*c0909341SAndroid Build Coastguard Worker
2716*c0909341SAndroid Build Coastguard Worker1:
2717*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2718*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2719*c0909341SAndroid Build Coastguard Worker2:
2720*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2721*c0909341SAndroid Build Coastguard Worker.rept 4
2722*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2723*c0909341SAndroid Build Coastguard Worker.endr
2724*c0909341SAndroid Build Coastguard Worker        bgt             2b
2725*c0909341SAndroid Build Coastguard Worker
2726*c0909341SAndroid Build Coastguard Worker3:
2727*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2728*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
2729*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2730*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
2731*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
2732*c0909341SAndroid Build Coastguard Worker.endr
2733*c0909341SAndroid Build Coastguard Worker
2734*c0909341SAndroid Build Coastguard Worker        add_sp_align    1024
2735*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2736*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2737*c0909341SAndroid Build Coastguard Workerendfunc
2738*c0909341SAndroid Build Coastguard Worker
2739*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_8x32_16bpc_neon, export=1
2740*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   32,  2
2741*c0909341SAndroid Build Coastguard Worker
2742*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2743*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2744*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
2745*c0909341SAndroid Build Coastguard Worker
2746*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x32, 2
2747*c0909341SAndroid Build Coastguard Worker
2748*c0909341SAndroid Build Coastguard Worker        mov             r8,  #4*32
2749*c0909341SAndroid Build Coastguard Worker        mov             r9,  #32
2750*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
2751*c0909341SAndroid Build Coastguard Worker1:
2752*c0909341SAndroid Build Coastguard Worker        vmov.i32        q0,  #0
2753*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2754*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [r2, :128]
2755*c0909341SAndroid Build Coastguard Worker        vst1.32         {q0}, [r2, :128], r8
2756*c0909341SAndroid Build Coastguard Worker.endr
2757*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #4
2758*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r8, lsl #3
2759*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #4
2760*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4*4
2761*c0909341SAndroid Build Coastguard Worker
2762*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4s_x8_neon
2763*c0909341SAndroid Build Coastguard Worker
2764*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q8,  #2
2765*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q9,  #2
2766*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q10, #2
2767*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q11, #2
2768*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q12, #2
2769*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q13, #2
2770*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q14, #2
2771*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q15, #2
2772*c0909341SAndroid Build Coastguard Worker
2773*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
2774*c0909341SAndroid Build Coastguard Worker
2775*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r6, :128]!
2776*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2777*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r6, :128]!
2778*c0909341SAndroid Build Coastguard Worker
2779*c0909341SAndroid Build Coastguard Worker        bge             1b
2780*c0909341SAndroid Build Coastguard Worker        cmp             r9,  #0
2781*c0909341SAndroid Build Coastguard Worker        beq             3f
2782*c0909341SAndroid Build Coastguard Worker
2783*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2784*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2785*c0909341SAndroid Build Coastguard Worker2:
2786*c0909341SAndroid Build Coastguard Worker        subs            r9,  r9,  #4
2787*c0909341SAndroid Build Coastguard Worker.rept 2
2788*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2789*c0909341SAndroid Build Coastguard Worker.endr
2790*c0909341SAndroid Build Coastguard Worker        bgt             2b
2791*c0909341SAndroid Build Coastguard Worker
2792*c0909341SAndroid Build Coastguard Worker3:
2793*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
2794*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
2795*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2796*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*2
2797*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2798*c0909341SAndroid Build Coastguard Worker.endr
2799*c0909341SAndroid Build Coastguard Worker
2800*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
2801*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2802*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2803*c0909341SAndroid Build Coastguard Workerendfunc
2804*c0909341SAndroid Build Coastguard Worker
2805*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x8_16bpc_neon, export=1
2806*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  8,   2
2807*c0909341SAndroid Build Coastguard Worker
2808*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2809*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2810*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x32
2811*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
2812*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2813*c0909341SAndroid Build Coastguard Worker
2814*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6
2815*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2816*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
2817*c0909341SAndroid Build Coastguard Worker.if \i > 0
2818*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2819*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(8 - \i)
2820*c0909341SAndroid Build Coastguard Worker        blt             1f
2821*c0909341SAndroid Build Coastguard Worker.if \i < 6
2822*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2823*c0909341SAndroid Build Coastguard Worker.endif
2824*c0909341SAndroid Build Coastguard Worker.endif
2825*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*4
2826*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x2_neon
2827*c0909341SAndroid Build Coastguard Worker.endr
2828*c0909341SAndroid Build Coastguard Worker        b               3f
2829*c0909341SAndroid Build Coastguard Worker
2830*c0909341SAndroid Build Coastguard Worker1:
2831*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2832*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2833*c0909341SAndroid Build Coastguard Worker2:
2834*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2835*c0909341SAndroid Build Coastguard Worker.rept 4
2836*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2837*c0909341SAndroid Build Coastguard Worker.endr
2838*c0909341SAndroid Build Coastguard Worker        bgt             2b
2839*c0909341SAndroid Build Coastguard Worker
2840*c0909341SAndroid Build Coastguard Worker3:
2841*c0909341SAndroid Build Coastguard Worker        mov             r8,  #2*32
2842*c0909341SAndroid Build Coastguard Worker        mov             r9,  #0
2843*c0909341SAndroid Build Coastguard Worker1:
2844*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r9, lsl #1
2845*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  r9, lsl #1 // #(\i*2)
2846*c0909341SAndroid Build Coastguard Worker
2847*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2848*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :128], r8
2849*c0909341SAndroid Build Coastguard Worker.endr
2850*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #8
2851*c0909341SAndroid Build Coastguard Worker
2852*c0909341SAndroid Build Coastguard Worker        bl              X(inv_dct_8h_x8_neon)
2853*c0909341SAndroid Build Coastguard Worker
2854*c0909341SAndroid Build Coastguard Worker        cmp             r9,  #32
2855*c0909341SAndroid Build Coastguard Worker
2856*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r6, r7
2857*c0909341SAndroid Build Coastguard Worker
2858*c0909341SAndroid Build Coastguard Worker        blt             1b
2859*c0909341SAndroid Build Coastguard Worker
2860*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
2861*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2862*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2863*c0909341SAndroid Build Coastguard Workerendfunc
2864*c0909341SAndroid Build Coastguard Worker
2865*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step1_neon
2866*c0909341SAndroid Build Coastguard Worker        // in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a
2867*c0909341SAndroid Build Coastguard Worker        // in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a
2868*c0909341SAndroid Build Coastguard Worker        // in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a
2869*c0909341SAndroid Build Coastguard Worker        // in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a
2870*c0909341SAndroid Build Coastguard Worker
2871*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0, q1}, [r12, :128]!
2872*c0909341SAndroid Build Coastguard Worker
2873*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d23, d16, d0[1]  // t63a
2874*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d16, d16, d0[0]  // t32a
2875*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d22, d17, d1[0]  // t62a
2876*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d17, d17, d1[1]  // t33a
2877*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d21, d18, d2[1]  // t61a
2878*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d18, d18, d2[0]  // t34a
2879*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d20, d19, d3[0]  // t60a
2880*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s32    d19, d19, d3[1]  // t35a
2881*c0909341SAndroid Build Coastguard Worker
2882*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0}, [r12, :128]!
2883*c0909341SAndroid Build Coastguard Worker
2884*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d24, d16, d17    // t32
2885*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d16, d17    // t33
2886*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d26, d19, d18    // t34
2887*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d27, d19, d18    // t35
2888*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d28, d20, d21    // t60
2889*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d29, d20, d21    // t61
2890*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d30, d23, d22    // t62
2891*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d31, d23, d22    // t63
2892*c0909341SAndroid Build Coastguard Worker
2893*c0909341SAndroid Build Coastguard Worker.irp r, q12, q13, q14, q15
2894*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
2895*c0909341SAndroid Build Coastguard Worker.endr
2896*c0909341SAndroid Build Coastguard Worker.irp r, q12, q13, q14, q15
2897*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
2898*c0909341SAndroid Build Coastguard Worker.endr
2899*c0909341SAndroid Build Coastguard Worker
2900*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d29, d26, d0[0], d0[1] // -> t34a
2901*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d29, d26, d0[1], d0[0] // -> t61a
2902*c0909341SAndroid Build Coastguard Worker        vneg.s32        d4,  d4                     // t34a
2903*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d7,  d30, d25, d0[1], d0[0] // -> t33a
2904*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d26, d4,  #12               // t34a
2905*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d30, d25, d0[0], d0[1] // -> t62a
2906*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d29, d6,  #12               // t61a
2907*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d7,  #12               // t33a
2908*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d30, d4,  #12               // t62a
2909*c0909341SAndroid Build Coastguard Worker
2910*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d24, d27    // t32a
2911*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d19, d24, d27    // t35a
2912*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d25, d26    // t33
2913*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d18, d25, d26    // t34
2914*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d31, d28    // t60a
2915*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d31, d28    // t63a
2916*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d30, d29    // t61
2917*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d22, d30, d29    // t62
2918*c0909341SAndroid Build Coastguard Worker
2919*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11
2920*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
2921*c0909341SAndroid Build Coastguard Worker.endr
2922*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11
2923*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
2924*c0909341SAndroid Build Coastguard Worker.endr
2925*c0909341SAndroid Build Coastguard Worker
2926*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d21, d18, d1[0], d1[1] // -> t61a
2927*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d21, d18, d1[1], d1[0] // -> t34a
2928*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  d20, d19, d1[0], d1[1] // -> t60
2929*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d4,  #12               // t61a
2930*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d6,  #12               // t34a
2931*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d20, d19, d1[1], d1[0] // -> t35
2932*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d7,  #12               // t60
2933*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d4,  #12               // t35
2934*c0909341SAndroid Build Coastguard Worker
2935*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16, d17, d18, d19}, [r6, :128]!
2936*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20, d21, d22, d23}, [r6, :128]!
2937*c0909341SAndroid Build Coastguard Worker
2938*c0909341SAndroid Build Coastguard Worker        bx              lr
2939*c0909341SAndroid Build Coastguard Workerendfunc
2940*c0909341SAndroid Build Coastguard Worker
2941*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step2_neon
2942*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
2943*c0909341SAndroid Build Coastguard Worker        vld1.32         {q0}, [r12, :128]
2944*c0909341SAndroid Build Coastguard Worker1:
2945*c0909341SAndroid Build Coastguard Worker        // t32a/33/34a/35/60/61a/62/63a
2946*c0909341SAndroid Build Coastguard Worker        // t56a/57/58a/59/36/37a/38/39a
2947*c0909341SAndroid Build Coastguard Worker        // t40a/41/42a/43/52/53a/54/55a
2948*c0909341SAndroid Build Coastguard Worker        // t48a/49/50a/51/44/45a/46/47a
2949*c0909341SAndroid Build Coastguard Worker        vldr            d16, [r6, #4*2*0]  // t32a
2950*c0909341SAndroid Build Coastguard Worker        vldr            d17, [r9, #4*2*8]  // t39a
2951*c0909341SAndroid Build Coastguard Worker        vldr            d18, [r9, #4*2*0]  // t63a
2952*c0909341SAndroid Build Coastguard Worker        vldr            d19, [r6, #4*2*8]  // t56a
2953*c0909341SAndroid Build Coastguard Worker        vldr            d20, [r6, #4*2*16] // t40a
2954*c0909341SAndroid Build Coastguard Worker        vldr            d21, [r9, #4*2*24] // t47a
2955*c0909341SAndroid Build Coastguard Worker        vldr            d22, [r9, #4*2*16] // t55a
2956*c0909341SAndroid Build Coastguard Worker        vldr            d23, [r6, #4*2*24] // t48a
2957*c0909341SAndroid Build Coastguard Worker
2958*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d24, d16, d17      // t32
2959*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d25, d16, d17      // t39
2960*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d26, d18, d19      // t63
2961*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d27, d18, d19      // t56
2962*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d28, d21, d20      // t40
2963*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d29, d21, d20      // t47
2964*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d30, d23, d22      // t48
2965*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d31, d23, d22      // t55
2966*c0909341SAndroid Build Coastguard Worker
2967*c0909341SAndroid Build Coastguard Worker.irp r, q12, q13, q14, q15
2968*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
2969*c0909341SAndroid Build Coastguard Worker.endr
2970*c0909341SAndroid Build Coastguard Worker.irp r, q12, q13, q14, q15
2971*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
2972*c0909341SAndroid Build Coastguard Worker.endr
2973*c0909341SAndroid Build Coastguard Worker
2974*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d27, d25, d1[1], d1[0] // -> t56a
2975*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d6,  d27, d25, d1[0], d1[1] // -> t39a
2976*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d7,  d31, d28, d1[1], d1[0] // -> t40a
2977*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d25, d4,  #12               // t56a
2978*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d27, d6,  #12               // t39a
2979*c0909341SAndroid Build Coastguard Worker        vneg.s32        d7,  d7                     // t40a
2980*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d31, d28, d1[0], d1[1] // -> t55a
2981*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d31, d7,  #12               // t40a
2982*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d28, d4,  #12               // t55a
2983*c0909341SAndroid Build Coastguard Worker
2984*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d16, d24, d29      // t32a
2985*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d19, d24, d29      // t47a
2986*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d17, d27, d31      // t39
2987*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d18, d27, d31      // t40
2988*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d20, d26, d30      // t48a
2989*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d23, d26, d30      // t63a
2990*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d21, d25, d28      // t55
2991*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d22, d25, d28      // t56
2992*c0909341SAndroid Build Coastguard Worker
2993*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11
2994*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q5
2995*c0909341SAndroid Build Coastguard Worker.endr
2996*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11
2997*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q4
2998*c0909341SAndroid Build Coastguard Worker.endr
2999*c0909341SAndroid Build Coastguard Worker
3000*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d4,  d21, d18, d0[0], d0[0] // -> t40a
3001*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d6,  d21, d18, d0[0], d0[0] // -> t55a
3002*c0909341SAndroid Build Coastguard Worker        vmul_vmls       d7,  d20, d19, d0[0], d0[0] // -> t47
3003*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d18, d4,  #12               // t40a
3004*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d21, d6,  #12               // t55a
3005*c0909341SAndroid Build Coastguard Worker        vmul_vmla       d4,  d20, d19, d0[0], d0[0] // -> t48
3006*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d19, d7,  #12               // t47
3007*c0909341SAndroid Build Coastguard Worker        vrshr.s32       d20, d4,  #12               // t48
3008*c0909341SAndroid Build Coastguard Worker
3009*c0909341SAndroid Build Coastguard Worker        vstr            d16, [r6, #4*2*0]  // t32a
3010*c0909341SAndroid Build Coastguard Worker        vstr            d17, [r9, #4*2*0]  // t39
3011*c0909341SAndroid Build Coastguard Worker        vstr            d18, [r6, #4*2*8]  // t40a
3012*c0909341SAndroid Build Coastguard Worker        vstr            d19, [r9, #4*2*8]  // t47
3013*c0909341SAndroid Build Coastguard Worker        vstr            d20, [r6, #4*2*16] // t48
3014*c0909341SAndroid Build Coastguard Worker        vstr            d21, [r9, #4*2*16] // t55a
3015*c0909341SAndroid Build Coastguard Worker        vstr            d22, [r6, #4*2*24] // t56
3016*c0909341SAndroid Build Coastguard Worker        vstr            d23, [r9, #4*2*24] // t63a
3017*c0909341SAndroid Build Coastguard Worker
3018*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #4*2
3019*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #4*2
3020*c0909341SAndroid Build Coastguard Worker        cmp             r6,  r9
3021*c0909341SAndroid Build Coastguard Worker        blt             1b
3022*c0909341SAndroid Build Coastguard Worker        bx              lr
3023*c0909341SAndroid Build Coastguard Workerendfunc
3024*c0909341SAndroid Build Coastguard Worker
3025*c0909341SAndroid Build Coastguard Worker.macro load8 src, strd, zero, clear
3026*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23
3027*c0909341SAndroid Build Coastguard Worker.if \clear
3028*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [\src, :64]
3029*c0909341SAndroid Build Coastguard Worker        vst1.32         {\zero}, [\src, :64], \strd
3030*c0909341SAndroid Build Coastguard Worker.else
3031*c0909341SAndroid Build Coastguard Worker        vld1.32         {\i}, [\src, :64], \strd
3032*c0909341SAndroid Build Coastguard Worker.endif
3033*c0909341SAndroid Build Coastguard Worker.endr
3034*c0909341SAndroid Build Coastguard Worker.endm
3035*c0909341SAndroid Build Coastguard Worker
3036*c0909341SAndroid Build Coastguard Worker.macro store16 dst
3037*c0909341SAndroid Build Coastguard Worker        vst1.32         {q8,  q9},  [\dst, :128]!
3038*c0909341SAndroid Build Coastguard Worker        vst1.32         {q10, q11}, [\dst, :128]!
3039*c0909341SAndroid Build Coastguard Worker        vst1.32         {q12, q13}, [\dst, :128]!
3040*c0909341SAndroid Build Coastguard Worker        vst1.32         {q14, q15}, [\dst, :128]!
3041*c0909341SAndroid Build Coastguard Worker.endm
3042*c0909341SAndroid Build Coastguard Worker
3043*c0909341SAndroid Build Coastguard Worker.macro clear_upper8
3044*c0909341SAndroid Build Coastguard Worker.irp i, q12, q13, q14, q15
3045*c0909341SAndroid Build Coastguard Worker        vmov.i32        \i,  #0
3046*c0909341SAndroid Build Coastguard Worker.endr
3047*c0909341SAndroid Build Coastguard Worker.endm
3048*c0909341SAndroid Build Coastguard Worker
3049*c0909341SAndroid Build Coastguard Worker.macro vmov_if reg, val, cond
3050*c0909341SAndroid Build Coastguard Worker.if \cond
3051*c0909341SAndroid Build Coastguard Worker        vmov.i32        \reg, \val
3052*c0909341SAndroid Build Coastguard Worker.endif
3053*c0909341SAndroid Build Coastguard Worker.endm
3054*c0909341SAndroid Build Coastguard Worker
3055*c0909341SAndroid Build Coastguard Worker.macro movdup_if reg, gpr, val, cond
3056*c0909341SAndroid Build Coastguard Worker.if \cond
3057*c0909341SAndroid Build Coastguard Worker        mov_const       \gpr, \val
3058*c0909341SAndroid Build Coastguard Worker        vdup.32         \reg, \gpr
3059*c0909341SAndroid Build Coastguard Worker.endif
3060*c0909341SAndroid Build Coastguard Worker.endm
3061*c0909341SAndroid Build Coastguard Worker
3062*c0909341SAndroid Build Coastguard Worker.macro vst1_if regs, dst, dstalign, cond
3063*c0909341SAndroid Build Coastguard Worker.if \cond
3064*c0909341SAndroid Build Coastguard Worker        vst1.32         \regs, \dst, \dstalign
3065*c0909341SAndroid Build Coastguard Worker.endif
3066*c0909341SAndroid Build Coastguard Worker.endm
3067*c0909341SAndroid Build Coastguard Worker
3068*c0909341SAndroid Build Coastguard Worker.macro scale_if cond, c, r0, r1, r2, r3, r4, r5, r6, r7
3069*c0909341SAndroid Build Coastguard Worker.if \cond
3070*c0909341SAndroid Build Coastguard Worker        scale_input     \c, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
3071*c0909341SAndroid Build Coastguard Worker.endif
3072*c0909341SAndroid Build Coastguard Worker.endm
3073*c0909341SAndroid Build Coastguard Worker
3074*c0909341SAndroid Build Coastguard Worker.macro def_dct64_func suffix, clear=0, scale=0
3075*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_dct\suffix\()_2s_x64_neon
3076*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
3077*c0909341SAndroid Build Coastguard Worker
3078*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
3079*c0909341SAndroid Build Coastguard Worker
3080*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #2
3081*c0909341SAndroid Build Coastguard Worker
3082*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  r12, 2896*8*(1<<16), \scale
3083*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3084*c0909341SAndroid Build Coastguard Worker        load8           r7,  r8,  d7,  \clear
3085*c0909341SAndroid Build Coastguard Worker        clear_upper8
3086*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #3
3087*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
3088*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9, q10, q11
3089*c0909341SAndroid Build Coastguard Worker
3090*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_2s_x16_neon
3091*c0909341SAndroid Build Coastguard Worker
3092*c0909341SAndroid Build Coastguard Worker        // idct_16 leaves the row_clip_max/min constants in d9 and d8,
3093*c0909341SAndroid Build Coastguard Worker        // but here we want to use full q registers for clipping.
3094*c0909341SAndroid Build Coastguard Worker        vmov.i32        q3,  #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
3095*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q2,  #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
3096*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11, q12, q13, q14, q15
3097*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r,  \r,  q3
3098*c0909341SAndroid Build Coastguard Worker.endr
3099*c0909341SAndroid Build Coastguard Worker.irp r, q8, q9, q10, q11, q12, q13, q14, q15
3100*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r,  \r,  q2
3101*c0909341SAndroid Build Coastguard Worker.endr
3102*c0909341SAndroid Build Coastguard Worker
3103*c0909341SAndroid Build Coastguard Worker        store16         r6
3104*c0909341SAndroid Build Coastguard Worker
3105*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  r12, 2896*8*(1<<16), \scale
3106*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3107*c0909341SAndroid Build Coastguard Worker        load8           r7,  r8,  d7,  \clear
3108*c0909341SAndroid Build Coastguard Worker        clear_upper8
3109*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #3
3110*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  #1
3111*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsr #1
3112*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9, q10, q11
3113*c0909341SAndroid Build Coastguard Worker
3114*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_2s_x16_neon
3115*c0909341SAndroid Build Coastguard Worker
3116*c0909341SAndroid Build Coastguard Worker        add             r10, r6,  #8*15
3117*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #8*16
3118*c0909341SAndroid Build Coastguard Worker
3119*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-8
3120*c0909341SAndroid Build Coastguard Worker
3121*c0909341SAndroid Build Coastguard Worker        vmov.i32        d1,  #0x1ffff // row_clip_max = ~(~bdmax << 7), 0x1ffff
3122*c0909341SAndroid Build Coastguard Worker        vmvn.i32        d0,  #0x1ffff // row_clip_min = (~bdmax << 7), 0xfffe0000
3123*c0909341SAndroid Build Coastguard Worker.macro store_addsub r0, r1, r2, r3
3124*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2},  [r6, :64]!
3125*c0909341SAndroid Build Coastguard Worker        vld1.32         {d3},  [r6, :64]!
3126*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d6,  d2,  \r0
3127*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r0, d2,  \r0
3128*c0909341SAndroid Build Coastguard Worker        vld1.32         {d4},  [r6, :64]!
3129*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d7,  d3,  \r1
3130*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r1, d3,  \r1
3131*c0909341SAndroid Build Coastguard Worker        vmin.s32        d6,  d6,  d1
3132*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r0, \r0, d1
3133*c0909341SAndroid Build Coastguard Worker        vld1.32         {d5},  [r6, :64]!
3134*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d2,  d4,  \r2
3135*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #8*4
3136*c0909341SAndroid Build Coastguard Worker        vmax.s32        d6,  d6,  d0
3137*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r0, \r0, d0
3138*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r2, d4,  \r2
3139*c0909341SAndroid Build Coastguard Worker        vmin.s32        d7,  d7,  d1
3140*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r1, \r1, d1
3141*c0909341SAndroid Build Coastguard Worker        vst1.32         {d6},  [r6,  :64]!
3142*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r0}, [r10, :64], r9
3143*c0909341SAndroid Build Coastguard Worker        vmin.s32        d2,  d2,  d1
3144*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r2, \r2, d1
3145*c0909341SAndroid Build Coastguard Worker        vmax.s32        d7,  d7,  d0
3146*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r1, \r1, d0
3147*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d3,  d5,  \r3
3148*c0909341SAndroid Build Coastguard Worker        vqsub.s32       \r3, d5,  \r3
3149*c0909341SAndroid Build Coastguard Worker        vmax.s32        d2,  d2,  d0
3150*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r2, \r2, d0
3151*c0909341SAndroid Build Coastguard Worker        vmin.s32        d3,  d3,  d1
3152*c0909341SAndroid Build Coastguard Worker        vmin.s32        \r3, \r3, d1
3153*c0909341SAndroid Build Coastguard Worker        vst1.32         {d7},  [r6,  :64]!
3154*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r1}, [r10, :64], r9
3155*c0909341SAndroid Build Coastguard Worker        vmax.s32        d3,  d3,  d0
3156*c0909341SAndroid Build Coastguard Worker        vmax.s32        \r3, \r3, d0
3157*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2},  [r6,  :64]!
3158*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r2}, [r10, :64], r9
3159*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3},  [r6,  :64]!
3160*c0909341SAndroid Build Coastguard Worker        vst1.32         {\r3}, [r10, :64], r9
3161*c0909341SAndroid Build Coastguard Worker.endm
3162*c0909341SAndroid Build Coastguard Worker        store_addsub    d31, d30, d29, d28
3163*c0909341SAndroid Build Coastguard Worker        store_addsub    d27, d26, d25, d24
3164*c0909341SAndroid Build Coastguard Worker        store_addsub    d23, d22, d21, d20
3165*c0909341SAndroid Build Coastguard Worker        store_addsub    d19, d18, d17, d16
3166*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
3167*c0909341SAndroid Build Coastguard Worker
3168*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #2*4*16
3169*c0909341SAndroid Build Coastguard Worker
3170*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct64_coeffs
3171*c0909341SAndroid Build Coastguard Worker        vmov.i32        q5,  #0x1ffff        // row_clip_max = ~(~bdmax << 7), 0x1ffff
3172*c0909341SAndroid Build Coastguard Worker        vmvn.i32        q4,  #0x1ffff        // row_clip_min = (~bdmax << 7), 0xfffe0000
3173*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  2896*8*(1<<16), \scale
3174*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3175*c0909341SAndroid Build Coastguard Worker        add             r9,  r7,  r8, lsl #4 // offset 16
3176*c0909341SAndroid Build Coastguard Worker        add             r10, r7,  r8, lsl #3 // offset 8
3177*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8         // offset 15
3178*c0909341SAndroid Build Coastguard Worker        sub             r11, r10, r8         // offset 7
3179*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16}, [r7,  :64]    // in1  (offset 0)
3180*c0909341SAndroid Build Coastguard Worker        vld1.32         {d17}, [r9,  :64]    // in31 (offset 15)
3181*c0909341SAndroid Build Coastguard Worker        vld1.32         {d18}, [r10, :64]    // in17 (offset 8)
3182*c0909341SAndroid Build Coastguard Worker        vld1.32         {d19}, [r11, :64]    // in15 (offset 7)
3183*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r7,  :64], \clear
3184*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r9,  :64], \clear
3185*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r10, :64], \clear
3186*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r11, :64], \clear
3187*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
3188*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3189*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  2896*8*(1<<16), \scale
3190*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3191*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsl #2 // offset 4
3192*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8, lsl #2 // offset 11
3193*c0909341SAndroid Build Coastguard Worker        sub             r10, r7,  r8         // offset 3
3194*c0909341SAndroid Build Coastguard Worker        add             r11, r9,  r8         // offset 12
3195*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16}, [r10, :64]    // in7  (offset 3)
3196*c0909341SAndroid Build Coastguard Worker        vld1.32         {d17}, [r11, :64]    // in25 (offset 12)
3197*c0909341SAndroid Build Coastguard Worker        vld1.32         {d18}, [r9,  :64]    // in23 (offset 11)
3198*c0909341SAndroid Build Coastguard Worker        vld1.32         {d19}, [r7,  :64]    // in9  (offset 4)
3199*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r7,  :64], \clear
3200*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r9,  :64], \clear
3201*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r10, :64], \clear
3202*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r11, :64], \clear
3203*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
3204*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3205*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  2896*8*(1<<16), \scale
3206*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3207*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r8, lsl #1 // offset 1
3208*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8, lsl #1 // offset 9
3209*c0909341SAndroid Build Coastguard Worker        add             r10, r10, r8         // offset 2
3210*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8         // offset 10
3211*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8         // offset 5
3212*c0909341SAndroid Build Coastguard Worker        add             r11, r11, r8         // offset 13
3213*c0909341SAndroid Build Coastguard Worker        vld1.32         d16, [r10, :64]      // in5  (offset 2)
3214*c0909341SAndroid Build Coastguard Worker        vld1.32         d17, [r11, :64]      // in27 (offset 13)
3215*c0909341SAndroid Build Coastguard Worker        vld1.32         d18, [r9,  :64]      // in21 (offset 10)
3216*c0909341SAndroid Build Coastguard Worker        vld1.32         d19, [r7,  :64]      // in11 (offset 5)
3217*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r10, :64], \clear
3218*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r11, :64], \clear
3219*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r9,  :64], \clear
3220*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r7,  :64], \clear
3221*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
3222*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3223*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  2896*8*(1<<16), \scale
3224*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
3225*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r8         // offset 1
3226*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8         // offset 9
3227*c0909341SAndroid Build Coastguard Worker        add             r11, r11, r8         // offset 14
3228*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8         // offset 6
3229*c0909341SAndroid Build Coastguard Worker        vld1.32         d16, [r10, :64]      // in3  (offset 1)
3230*c0909341SAndroid Build Coastguard Worker        vld1.32         d17, [r11, :64]      // in29 (offset 14)
3231*c0909341SAndroid Build Coastguard Worker        vld1.32         d18, [r9,  :64]      // in19 (offset 9)
3232*c0909341SAndroid Build Coastguard Worker        vld1.32         d19, [r7,  :64]      // in13 (offset 6)
3233*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r10, :64], \clear
3234*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r11, :64], \clear
3235*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r9,  :64], \clear
3236*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r7,  :64], \clear
3237*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
3238*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
3239*c0909341SAndroid Build Coastguard Worker
3240*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #2*4*32
3241*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  #2*4*7
3242*c0909341SAndroid Build Coastguard Worker
3243*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step2_neon
3244*c0909341SAndroid Build Coastguard Worker
3245*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
3246*c0909341SAndroid Build Coastguard Workerendfunc
3247*c0909341SAndroid Build Coastguard Worker.endm
3248*c0909341SAndroid Build Coastguard Worker
3249*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear, clear=1
3250*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear_scale, clear=1, scale=1
3251*c0909341SAndroid Build Coastguard Worker
3252*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz_dct_64x2_neon
3253*c0909341SAndroid Build Coastguard Worker        vdup.32         q4,  r9
3254*c0909341SAndroid Build Coastguard Worker
3255*c0909341SAndroid Build Coastguard Worker        mov             r7,  sp
3256*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #2*4*(64 - 4)
3257*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  #2*56
3258*c0909341SAndroid Build Coastguard Worker
3259*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
3260*c0909341SAndroid Build Coastguard Worker
3261*c0909341SAndroid Build Coastguard Worker        mov             r10, #2*64
3262*c0909341SAndroid Build Coastguard Worker        mov             r11, #-2*4*4
3263*c0909341SAndroid Build Coastguard Worker
3264*c0909341SAndroid Build Coastguard Worker1:
3265*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16, d17, d18, d19}, [r7, :128]!
3266*c0909341SAndroid Build Coastguard Worker        vld1.32         {d28, d29, d30, d31}, [r8, :128], r11
3267*c0909341SAndroid Build Coastguard Worker        vld1.32         {d20, d21, d22, d23}, [r7, :128]!
3268*c0909341SAndroid Build Coastguard Worker        vld1.32         {d24, d25, d26, d27}, [r8, :128], r11
3269*c0909341SAndroid Build Coastguard Worker        vtrn.32         d16, d17
3270*c0909341SAndroid Build Coastguard Worker        vtrn.32         d18, d19
3271*c0909341SAndroid Build Coastguard Worker        vtrn.32         d20, d21
3272*c0909341SAndroid Build Coastguard Worker        vtrn.32         d22, d23
3273*c0909341SAndroid Build Coastguard Worker        vtrn.32         d31, d30
3274*c0909341SAndroid Build Coastguard Worker        vtrn.32         d29, d28
3275*c0909341SAndroid Build Coastguard Worker        vtrn.32         d27, d26
3276*c0909341SAndroid Build Coastguard Worker        vtrn.32         d25, d24
3277*c0909341SAndroid Build Coastguard Worker
3278*c0909341SAndroid Build Coastguard Worker.macro store_addsub src0, src1, src2, src3, src4, src5, src6, src7
3279*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d7,  \src0,  \src1
3280*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d6,  \src2,  \src3
3281*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d5,  \src4,  \src5
3282*c0909341SAndroid Build Coastguard Worker        vqsub.s32       d4,  \src6,  \src7
3283*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d0,  \src0,  \src1
3284*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d1,  \src2,  \src3
3285*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d2,  \src4,  \src5
3286*c0909341SAndroid Build Coastguard Worker        vqadd.s32       d3,  \src6,  \src7
3287*c0909341SAndroid Build Coastguard Worker        vrshl.s32       q3,  q3,  q4
3288*c0909341SAndroid Build Coastguard Worker        vrshl.s32       q2,  q2,  q4
3289*c0909341SAndroid Build Coastguard Worker        vrshl.s32       q0,  q0,  q4
3290*c0909341SAndroid Build Coastguard Worker        vrshl.s32       q1,  q1,  q4
3291*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d7,  q3
3292*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d6,  q2
3293*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d0,  q0
3294*c0909341SAndroid Build Coastguard Worker        vqmovn.s32      d1,  q1
3295*c0909341SAndroid Build Coastguard Worker        vrev32.16       q3,  q3
3296*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r6, :128], r10
3297*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r9, :128], r10
3298*c0909341SAndroid Build Coastguard Worker.endm
3299*c0909341SAndroid Build Coastguard Worker        store_addsub    d16, d31, d18, d29, d20, d27, d22, d25
3300*c0909341SAndroid Build Coastguard Worker        store_addsub    d17, d30, d19, d28, d21, d26, d23, d24
3301*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
3302*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r10, lsl #1
3303*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r10, lsl #1
3304*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #16
3305*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #16
3306*c0909341SAndroid Build Coastguard Worker
3307*c0909341SAndroid Build Coastguard Worker        cmp             r7,  r8
3308*c0909341SAndroid Build Coastguard Worker        blt             1b
3309*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
3310*c0909341SAndroid Build Coastguard Workerendfunc
3311*c0909341SAndroid Build Coastguard Worker
3312*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_4x64_neon
3313*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
3314*c0909341SAndroid Build Coastguard Worker
3315*c0909341SAndroid Build Coastguard Worker        mov             r7,  sp
3316*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #2*4*(64 - 4)
3317*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  r1, lsl #6
3318*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r1
3319*c0909341SAndroid Build Coastguard Worker
3320*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
3321*c0909341SAndroid Build Coastguard Worker
3322*c0909341SAndroid Build Coastguard Worker        neg             r10, r1
3323*c0909341SAndroid Build Coastguard Worker        mov             r11, #-2*4*4
3324*c0909341SAndroid Build Coastguard Worker
3325*c0909341SAndroid Build Coastguard Worker1:
3326*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r7, :128]!
3327*c0909341SAndroid Build Coastguard Worker        vld1.16         {d28, d29, d30, d31}, [r8, :128], r11
3328*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r7, :128]!
3329*c0909341SAndroid Build Coastguard Worker        vld1.16         {d24, d25, d26, d27}, [r8, :128], r11
3330*c0909341SAndroid Build Coastguard Worker
3331*c0909341SAndroid Build Coastguard Worker        vmov.i16        q6,  #0
3332*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q7,  #0xfc00 // 0x3ff
3333*c0909341SAndroid Build Coastguard Worker.macro add_dest_addsub src0, src1, src2, src3
3334*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r6, :64], r1
3335*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r9, :64], r10
3336*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d4,  \src0,  \src1
3337*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r6, :64]
3338*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  \src0,  \src1
3339*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r9, :64]
3340*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d6,  \src2,  \src3
3341*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d7,  \src2,  \src3
3342*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r1
3343*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r10
3344*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
3345*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
3346*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q2,  q0
3347*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,  q3,  q1
3348*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q6
3349*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q6
3350*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q7
3351*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q7
3352*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r6, :64], r1
3353*c0909341SAndroid Build Coastguard Worker        vst1.16         {d5}, [r9, :64], r10
3354*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6}, [r6, :64], r1
3355*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [r9, :64], r10
3356*c0909341SAndroid Build Coastguard Worker.endm
3357*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d16, d31, d17, d30
3358*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d18, d29, d19, d28
3359*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d20, d27, d21, d26
3360*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d22, d25, d23, d24
3361*c0909341SAndroid Build Coastguard Worker.purgem add_dest_addsub
3362*c0909341SAndroid Build Coastguard Worker        cmp             r7,  r8
3363*c0909341SAndroid Build Coastguard Worker        blt             1b
3364*c0909341SAndroid Build Coastguard Worker
3365*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
3366*c0909341SAndroid Build Coastguard Workerendfunc
3367*c0909341SAndroid Build Coastguard Worker
3368*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x64_16bpc_neon, export=1
3369*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  64,  2
3370*c0909341SAndroid Build Coastguard Worker
3371*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3372*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3373*c0909341SAndroid Build Coastguard Worker
3374*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*32*2+64*4*2
3375*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3376*c0909341SAndroid Build Coastguard Worker
3377*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3378*c0909341SAndroid Build Coastguard Worker
3379*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
3380*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3381*c0909341SAndroid Build Coastguard Worker.if \i > 0
3382*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3383*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3384*c0909341SAndroid Build Coastguard Worker        blt             1f
3385*c0909341SAndroid Build Coastguard Worker.endif
3386*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
3387*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*4
3388*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_2s_x64_neon
3389*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3390*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-2 // shift
3391*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x2_neon
3392*c0909341SAndroid Build Coastguard Worker.if \i < 30
3393*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3394*c0909341SAndroid Build Coastguard Worker.endif
3395*c0909341SAndroid Build Coastguard Worker.endr
3396*c0909341SAndroid Build Coastguard Worker        b               3f
3397*c0909341SAndroid Build Coastguard Worker
3398*c0909341SAndroid Build Coastguard Worker1:
3399*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3400*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3401*c0909341SAndroid Build Coastguard Worker2:
3402*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3403*c0909341SAndroid Build Coastguard Worker.rept 8
3404*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3405*c0909341SAndroid Build Coastguard Worker.endr
3406*c0909341SAndroid Build Coastguard Worker        bgt             2b
3407*c0909341SAndroid Build Coastguard Worker
3408*c0909341SAndroid Build Coastguard Worker3:
3409*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3410*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3411*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3412*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_4h_x64_neon)
3413*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
3414*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3415*c0909341SAndroid Build Coastguard Worker.endr
3416*c0909341SAndroid Build Coastguard Worker
3417*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*32*2+64*4*2
3418*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3419*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3420*c0909341SAndroid Build Coastguard Workerendfunc
3421*c0909341SAndroid Build Coastguard Worker
3422*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x32_16bpc_neon, export=1
3423*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  32,  1
3424*c0909341SAndroid Build Coastguard Worker
3425*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3426*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3427*c0909341SAndroid Build Coastguard Worker
3428*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*32*2+64*4*2
3429*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3430*c0909341SAndroid Build Coastguard Worker
3431*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3432*c0909341SAndroid Build Coastguard Worker
3433*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
3434*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3435*c0909341SAndroid Build Coastguard Worker.if \i > 0
3436*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3437*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3438*c0909341SAndroid Build Coastguard Worker        blt             1f
3439*c0909341SAndroid Build Coastguard Worker.endif
3440*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
3441*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*4
3442*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_scale_2s_x64_neon
3443*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3444*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-1 // shift
3445*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x2_neon
3446*c0909341SAndroid Build Coastguard Worker.if \i < 30
3447*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3448*c0909341SAndroid Build Coastguard Worker.endif
3449*c0909341SAndroid Build Coastguard Worker.endr
3450*c0909341SAndroid Build Coastguard Worker        b               3f
3451*c0909341SAndroid Build Coastguard Worker
3452*c0909341SAndroid Build Coastguard Worker1:
3453*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3454*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3455*c0909341SAndroid Build Coastguard Worker2:
3456*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3457*c0909341SAndroid Build Coastguard Worker.rept 8
3458*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3459*c0909341SAndroid Build Coastguard Worker.endr
3460*c0909341SAndroid Build Coastguard Worker        bgt             2b
3461*c0909341SAndroid Build Coastguard Worker
3462*c0909341SAndroid Build Coastguard Worker3:
3463*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3464*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
3465*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3466*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3467*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
3468*c0909341SAndroid Build Coastguard Worker.endr
3469*c0909341SAndroid Build Coastguard Worker
3470*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*32*2+64*4*2
3471*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3472*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3473*c0909341SAndroid Build Coastguard Workerendfunc
3474*c0909341SAndroid Build Coastguard Worker
3475*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x64_16bpc_neon, export=1
3476*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  64,  1
3477*c0909341SAndroid Build Coastguard Worker
3478*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3479*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3480*c0909341SAndroid Build Coastguard Worker
3481*c0909341SAndroid Build Coastguard Worker        sub_sp_align    32*32*2+64*4*2
3482*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3483*c0909341SAndroid Build Coastguard Worker
3484*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3485*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3486*c0909341SAndroid Build Coastguard Worker
3487*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
3488*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*32*2)
3489*c0909341SAndroid Build Coastguard Worker.if \i > 0
3490*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3491*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3492*c0909341SAndroid Build Coastguard Worker        blt             1f
3493*c0909341SAndroid Build Coastguard Worker.if \i < 30
3494*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3495*c0909341SAndroid Build Coastguard Worker.endif
3496*c0909341SAndroid Build Coastguard Worker.endif
3497*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
3498*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*4
3499*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x2_neon
3500*c0909341SAndroid Build Coastguard Worker.endr
3501*c0909341SAndroid Build Coastguard Worker        b               3f
3502*c0909341SAndroid Build Coastguard Worker
3503*c0909341SAndroid Build Coastguard Worker1:
3504*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3505*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3506*c0909341SAndroid Build Coastguard Worker2:
3507*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3508*c0909341SAndroid Build Coastguard Worker.rept 4
3509*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3510*c0909341SAndroid Build Coastguard Worker.endr
3511*c0909341SAndroid Build Coastguard Worker        bgt             2b
3512*c0909341SAndroid Build Coastguard Worker
3513*c0909341SAndroid Build Coastguard Worker3:
3514*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3515*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3516*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3517*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_4h_x64_neon)
3518*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
3519*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3520*c0909341SAndroid Build Coastguard Worker.endr
3521*c0909341SAndroid Build Coastguard Worker
3522*c0909341SAndroid Build Coastguard Worker        add_sp_align    32*32*2+64*4*2
3523*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3524*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3525*c0909341SAndroid Build Coastguard Workerendfunc
3526*c0909341SAndroid Build Coastguard Worker
3527*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x16_16bpc_neon, export=1
3528*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  16,  2
3529*c0909341SAndroid Build Coastguard Worker
3530*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3531*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3532*c0909341SAndroid Build Coastguard Worker
3533*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*16*2+64*4*2
3534*c0909341SAndroid Build Coastguard Worker        add             r4,  sp,  #64*4*2
3535*c0909341SAndroid Build Coastguard Worker
3536*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
3537*c0909341SAndroid Build Coastguard Worker
3538*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14
3539*c0909341SAndroid Build Coastguard Worker        add             r6,  r4,  #(\i*64*2)
3540*c0909341SAndroid Build Coastguard Worker.if \i > 0
3541*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
3542*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3543*c0909341SAndroid Build Coastguard Worker        blt             1f
3544*c0909341SAndroid Build Coastguard Worker.endif
3545*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
3546*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*4
3547*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_2s_x64_neon
3548*c0909341SAndroid Build Coastguard Worker        add             r6,  r4,  #(\i*64*2)
3549*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-2 // shift
3550*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x2_neon
3551*c0909341SAndroid Build Coastguard Worker.if \i < 8
3552*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3553*c0909341SAndroid Build Coastguard Worker.endif
3554*c0909341SAndroid Build Coastguard Worker.endr
3555*c0909341SAndroid Build Coastguard Worker        b               3f
3556*c0909341SAndroid Build Coastguard Worker
3557*c0909341SAndroid Build Coastguard Worker1:
3558*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3559*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3560*c0909341SAndroid Build Coastguard Worker2:
3561*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3562*c0909341SAndroid Build Coastguard Worker.rept 8
3563*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3564*c0909341SAndroid Build Coastguard Worker.endr
3565*c0909341SAndroid Build Coastguard Worker        bgt             2b
3566*c0909341SAndroid Build Coastguard Worker
3567*c0909341SAndroid Build Coastguard Worker3:
3568*c0909341SAndroid Build Coastguard Worker        movrel          r5,  X(inv_dct_4h_x16_neon)
3569*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3570*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
3571*c0909341SAndroid Build Coastguard Worker        add             r7,  r4,  #(\i*2)
3572*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3573*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
3574*c0909341SAndroid Build Coastguard Worker.endr
3575*c0909341SAndroid Build Coastguard Worker
3576*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*16*2+64*4*2
3577*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3578*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3579*c0909341SAndroid Build Coastguard Workerendfunc
3580*c0909341SAndroid Build Coastguard Worker
3581*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x64_16bpc_neon, export=1
3582*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  64,  2
3583*c0909341SAndroid Build Coastguard Worker
3584*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3585*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
3586*c0909341SAndroid Build Coastguard Worker
3587*c0909341SAndroid Build Coastguard Worker        sub_sp_align    16*32*2+64*4*2
3588*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3589*c0909341SAndroid Build Coastguard Worker
3590*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
3591*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3592*c0909341SAndroid Build Coastguard Worker
3593*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_dct_2s_x16_neon
3594*c0909341SAndroid Build Coastguard Worker.irp i, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
3595*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*16*2)
3596*c0909341SAndroid Build Coastguard Worker.if \i > 0
3597*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3598*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3599*c0909341SAndroid Build Coastguard Worker        blt             1f
3600*c0909341SAndroid Build Coastguard Worker.if \i < 30
3601*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3602*c0909341SAndroid Build Coastguard Worker.endif
3603*c0909341SAndroid Build Coastguard Worker.endif
3604*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*4)
3605*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*4
3606*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x2_neon
3607*c0909341SAndroid Build Coastguard Worker.endr
3608*c0909341SAndroid Build Coastguard Worker        b               3f
3609*c0909341SAndroid Build Coastguard Worker
3610*c0909341SAndroid Build Coastguard Worker1:
3611*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3612*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3613*c0909341SAndroid Build Coastguard Worker2:
3614*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3615*c0909341SAndroid Build Coastguard Worker.rept 2
3616*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3617*c0909341SAndroid Build Coastguard Worker.endr
3618*c0909341SAndroid Build Coastguard Worker        bgt             2b
3619*c0909341SAndroid Build Coastguard Worker
3620*c0909341SAndroid Build Coastguard Worker3:
3621*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
3622*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3623*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
3624*c0909341SAndroid Build Coastguard Worker        bl              X(inv_txfm_dct_4h_x64_neon)
3625*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i*2)
3626*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3627*c0909341SAndroid Build Coastguard Worker.endr
3628*c0909341SAndroid Build Coastguard Worker
3629*c0909341SAndroid Build Coastguard Worker        add_sp_align    16*32*2+64*4*2
3630*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
3631*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3632*c0909341SAndroid Build Coastguard Workerendfunc
3633