xref: /aosp_15_r20/external/libdav1d/src/arm/32/itx.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker
31*c0909341SAndroid Build Coastguard Worker// The exported functions in this file have got the following signature:
32*c0909341SAndroid Build Coastguard Worker// void itxfm_add(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob);
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker// Most of the functions use the following register layout:
35*c0909341SAndroid Build Coastguard Worker// r0-r3   external parameters
36*c0909341SAndroid Build Coastguard Worker// r4      function pointer to first transform
37*c0909341SAndroid Build Coastguard Worker// r5      function pointer to second transform
38*c0909341SAndroid Build Coastguard Worker// r6      output parameter for helper function
39*c0909341SAndroid Build Coastguard Worker// r7      input parameter for helper function
40*c0909341SAndroid Build Coastguard Worker// r8      input stride for helper function
41*c0909341SAndroid Build Coastguard Worker// r9      scratch variable for helper functions
42*c0909341SAndroid Build Coastguard Worker// r10-r11 pointer to list of eob thresholds, eob threshold value,
43*c0909341SAndroid Build Coastguard Worker//         scratch variables within helper functions (backed up)
44*c0909341SAndroid Build Coastguard Worker
45*c0909341SAndroid Build Coastguard Worker// The SIMD registers most often use the following layout:
46*c0909341SAndroid Build Coastguard Worker// d0-d3   multiplication coefficients
47*c0909341SAndroid Build Coastguard Worker// d4-d7   scratch registers
48*c0909341SAndroid Build Coastguard Worker// d8-d15  unused in some transforms, used for scratch registers in others
49*c0909341SAndroid Build Coastguard Worker// d16-v31 inputs/outputs of transforms
50*c0909341SAndroid Build Coastguard Worker
51*c0909341SAndroid Build Coastguard Worker// Potential further optimizations, that are left unimplemented for now:
52*c0909341SAndroid Build Coastguard Worker// - Trying to keep multiplication coefficients in registers across multiple
53*c0909341SAndroid Build Coastguard Worker//   transform functions. (The register layout is designed to potentially
54*c0909341SAndroid Build Coastguard Worker//   allow this.)
55*c0909341SAndroid Build Coastguard Worker// - Use a simplified version of the transforms themselves for cases where
56*c0909341SAndroid Build Coastguard Worker//   we know a significant number of inputs are zero. E.g. if the eob value
57*c0909341SAndroid Build Coastguard Worker//   indicates only a quarter of input values are set, for idct16 and up,
58*c0909341SAndroid Build Coastguard Worker//   a significant amount of calculation can be skipped, at the cost of more
59*c0909341SAndroid Build Coastguard Worker//   code duplication and special casing.
60*c0909341SAndroid Build Coastguard Worker
61*c0909341SAndroid Build Coastguard Workerconst idct_coeffs, align=4
62*c0909341SAndroid Build Coastguard Worker        // idct4
63*c0909341SAndroid Build Coastguard Worker        .short          2896, 2896*8, 1567, 3784
64*c0909341SAndroid Build Coastguard Worker        // idct8
65*c0909341SAndroid Build Coastguard Worker        .short          799, 4017, 3406, 2276
66*c0909341SAndroid Build Coastguard Worker        // idct16
67*c0909341SAndroid Build Coastguard Worker        .short          401, 4076, 3166, 2598
68*c0909341SAndroid Build Coastguard Worker        .short          1931, 3612, 3920, 1189
69*c0909341SAndroid Build Coastguard Worker        // idct32
70*c0909341SAndroid Build Coastguard Worker        .short          201, 4091, 3035, 2751
71*c0909341SAndroid Build Coastguard Worker        .short          1751, 3703, 3857, 1380
72*c0909341SAndroid Build Coastguard Worker        .short          995, 3973, 3513, 2106
73*c0909341SAndroid Build Coastguard Worker        .short          2440, 3290, 4052, 601
74*c0909341SAndroid Build Coastguard Workerendconst
75*c0909341SAndroid Build Coastguard Worker
76*c0909341SAndroid Build Coastguard Workerconst idct64_coeffs, align=4
77*c0909341SAndroid Build Coastguard Worker        .short          101*8, 4095*8, 2967*8, -2824*8
78*c0909341SAndroid Build Coastguard Worker        .short          1660*8, 3745*8, 3822*8, -1474*8
79*c0909341SAndroid Build Coastguard Worker        .short          4076, 401, 4017, 799
80*c0909341SAndroid Build Coastguard Worker
81*c0909341SAndroid Build Coastguard Worker        .short          4036*8, -700*8, 2359*8, 3349*8
82*c0909341SAndroid Build Coastguard Worker        .short          3461*8, -2191*8, 897*8, 3996*8
83*c0909341SAndroid Build Coastguard Worker        .short          -3166, -2598, -799, -4017
84*c0909341SAndroid Build Coastguard Worker
85*c0909341SAndroid Build Coastguard Worker        .short          501*8, 4065*8, 3229*8, -2520*8
86*c0909341SAndroid Build Coastguard Worker        .short          2019*8, 3564*8, 3948*8, -1092*8
87*c0909341SAndroid Build Coastguard Worker        .short          3612, 1931, 2276, 3406
88*c0909341SAndroid Build Coastguard Worker
89*c0909341SAndroid Build Coastguard Worker        .short          4085*8, -301*8, 2675*8, 3102*8
90*c0909341SAndroid Build Coastguard Worker        .short          3659*8, -1842*8, 1285*8, 3889*8
91*c0909341SAndroid Build Coastguard Worker        .short          -3920, -1189, -3406, -2276
92*c0909341SAndroid Build Coastguard Workerendconst
93*c0909341SAndroid Build Coastguard Worker
94*c0909341SAndroid Build Coastguard Workerconst iadst4_coeffs, align=4
95*c0909341SAndroid Build Coastguard Worker        // .h[4-5] can be interpreted as .s[2]
96*c0909341SAndroid Build Coastguard Worker        .short          1321, 3803, 2482, 3344, 3344, 0
97*c0909341SAndroid Build Coastguard Workerendconst
98*c0909341SAndroid Build Coastguard Worker
99*c0909341SAndroid Build Coastguard Workerconst iadst8_coeffs, align=4
100*c0909341SAndroid Build Coastguard Worker        .short          4076, 401, 3612, 1931
101*c0909341SAndroid Build Coastguard Worker        .short          2598, 3166, 1189, 3920
102*c0909341SAndroid Build Coastguard Worker        // idct_coeffs
103*c0909341SAndroid Build Coastguard Worker        .short          2896, 0, 1567, 3784, 0, 0, 0, 0
104*c0909341SAndroid Build Coastguard Workerendconst
105*c0909341SAndroid Build Coastguard Worker
106*c0909341SAndroid Build Coastguard Workerconst iadst16_coeffs, align=4
107*c0909341SAndroid Build Coastguard Worker        .short          4091, 201, 3973, 995
108*c0909341SAndroid Build Coastguard Worker        .short          3703, 1751, 3290, 2440
109*c0909341SAndroid Build Coastguard Worker        .short          2751, 3035, 2106, 3513
110*c0909341SAndroid Build Coastguard Worker        .short          1380, 3857, 601, 4052
111*c0909341SAndroid Build Coastguard Workerendconst
112*c0909341SAndroid Build Coastguard Worker
113*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlal d0, s0, s1, c0, c1
114*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d0, \s0, \c0
115*c0909341SAndroid Build Coastguard Worker        vmlal.s16       \d0, \s1, \c1
116*c0909341SAndroid Build Coastguard Worker.endm
117*c0909341SAndroid Build Coastguard Worker
118*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlal_8h d0, d1, s0, s1, s2, s3, c0, c1
119*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d0, \s0, \c0
120*c0909341SAndroid Build Coastguard Worker        vmlal.s16       \d0, \s2, \c1
121*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d1, \s1, \c0
122*c0909341SAndroid Build Coastguard Worker        vmlal.s16       \d1, \s3, \c1
123*c0909341SAndroid Build Coastguard Worker.endm
124*c0909341SAndroid Build Coastguard Worker
125*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlsl d0, s0, s1, c0, c1
126*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d0, \s0, \c0
127*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       \d0, \s1, \c1
128*c0909341SAndroid Build Coastguard Worker.endm
129*c0909341SAndroid Build Coastguard Worker
130*c0909341SAndroid Build Coastguard Worker.macro vmull_vmlsl_8h d0, d1, s0, s1, s2, s3, c0, c1
131*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d0, \s0, \c0
132*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       \d0, \s2, \c1
133*c0909341SAndroid Build Coastguard Worker        vmull.s16       \d1, \s1, \c0
134*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       \d1, \s3, \c1
135*c0909341SAndroid Build Coastguard Worker.endm
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker.macro vqrshrn_8h d0, d1, s0, s1, shift
138*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \d0, \s0, \shift
139*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \d1, \s1, \shift
140*c0909341SAndroid Build Coastguard Worker.endm
141*c0909341SAndroid Build Coastguard Worker
142*c0909341SAndroid Build Coastguard Worker.macro scale_input c, r0, r1, r2 r3, r4, r5, r6, r7
143*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r0, \r0, \c
144*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r1, \r1, \c
145*c0909341SAndroid Build Coastguard Worker.ifnb \r2
146*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r2, \r2, \c
147*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r3, \r3, \c
148*c0909341SAndroid Build Coastguard Worker.endif
149*c0909341SAndroid Build Coastguard Worker.ifnb \r4
150*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r4, \r4, \c
151*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r5, \r5, \c
152*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r6, \r6, \c
153*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    \r7, \r7, \c
154*c0909341SAndroid Build Coastguard Worker.endif
155*c0909341SAndroid Build Coastguard Worker.endm
156*c0909341SAndroid Build Coastguard Worker
157*c0909341SAndroid Build Coastguard Worker.macro load_add_store load, shift, addsrc, adddst, narrowsrc, narrowdst, store, dst, src, shiftbits=4
158*c0909341SAndroid Build Coastguard Worker.ifnb \load
159*c0909341SAndroid Build Coastguard Worker        vld1.8          {\load},  [\src, :64], r1
160*c0909341SAndroid Build Coastguard Worker.endif
161*c0909341SAndroid Build Coastguard Worker.ifnb \shift
162*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \shift,  \shift,  #\shiftbits
163*c0909341SAndroid Build Coastguard Worker.endif
164*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
165*c0909341SAndroid Build Coastguard Worker        vaddw.u8        \adddst, \adddst, \addsrc
166*c0909341SAndroid Build Coastguard Worker.endif
167*c0909341SAndroid Build Coastguard Worker.ifnb \narrowsrc
168*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     \narrowdst, \narrowsrc
169*c0909341SAndroid Build Coastguard Worker.endif
170*c0909341SAndroid Build Coastguard Worker.ifnb \store
171*c0909341SAndroid Build Coastguard Worker        vst1.8          {\store},  [\dst, :64], r1
172*c0909341SAndroid Build Coastguard Worker.endif
173*c0909341SAndroid Build Coastguard Worker.endm
174*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x8 dst, src, shiftbits=4
175*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
176*c0909341SAndroid Build Coastguard Worker        load_add_store  d2,  q8,    ,    ,    ,    ,    , \dst, \src, \shiftbits
177*c0909341SAndroid Build Coastguard Worker        load_add_store  d3,  q9,    ,    ,    ,    ,    , \dst, \src, \shiftbits
178*c0909341SAndroid Build Coastguard Worker        load_add_store  d4,  q10, d2,  q8,    ,    ,    , \dst, \src, \shiftbits
179*c0909341SAndroid Build Coastguard Worker        load_add_store  d5,  q11, d3,  q9,  q8,  d2,    , \dst, \src, \shiftbits
180*c0909341SAndroid Build Coastguard Worker        load_add_store  d6,  q12, d4,  q10, q9,  d3,  d2, \dst, \src, \shiftbits
181*c0909341SAndroid Build Coastguard Worker        load_add_store  d7,  q13, d5,  q11, q10, d4,  d3, \dst, \src, \shiftbits
182*c0909341SAndroid Build Coastguard Worker        load_add_store  d2,  q14, d6,  q12, q11, d5,  d4, \dst, \src, \shiftbits
183*c0909341SAndroid Build Coastguard Worker        load_add_store  d3,  q15, d7,  q13, q12, d6,  d5, \dst, \src, \shiftbits
184*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , d2,  q14, q13, d7,  d6, \dst, \src, \shiftbits
185*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , d3,  q15, q14, d2,  d7, \dst, \src, \shiftbits
186*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     , q15, d3,  d2, \dst, \src, \shiftbits
187*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    ,   ,  d3, \dst, \src, \shiftbits
188*c0909341SAndroid Build Coastguard Worker.endm
189*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x4 dst, src
190*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
191*c0909341SAndroid Build Coastguard Worker        load_add_store  d2,  q8,    ,    ,    ,    ,    ,  \dst, \src
192*c0909341SAndroid Build Coastguard Worker        load_add_store  d3,  q9,    ,    ,    ,    ,    ,  \dst, \src
193*c0909341SAndroid Build Coastguard Worker        load_add_store  d4,  q10, d2,  q8,    ,    ,    ,  \dst, \src
194*c0909341SAndroid Build Coastguard Worker        load_add_store  d5,  q11, d3,  q9,  q8,  d2,    ,  \dst, \src
195*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , d4,  q10, q9,  d3,  d2,  \dst, \src
196*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     , d5,  q11, q10, d4,  d3,  \dst, \src
197*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     , q11, d5,  d4,  \dst, \src
198*c0909341SAndroid Build Coastguard Worker        load_add_store    ,     ,   ,     ,    ,   ,  d5,  \dst, \src
199*c0909341SAndroid Build Coastguard Worker.endm
200*c0909341SAndroid Build Coastguard Worker.macro load_add_store4 load, shift, addsrc, adddst, narrowsrc, narrowdst, store, dst, src
201*c0909341SAndroid Build Coastguard Worker.ifnb \load
202*c0909341SAndroid Build Coastguard Worker        vld1.32         {\load[0]},  [\src, :32], r1
203*c0909341SAndroid Build Coastguard Worker.endif
204*c0909341SAndroid Build Coastguard Worker.ifnb \shift
205*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \shift,  \shift,  #4
206*c0909341SAndroid Build Coastguard Worker.endif
207*c0909341SAndroid Build Coastguard Worker.ifnb \load
208*c0909341SAndroid Build Coastguard Worker        vld1.32         {\load[1]},  [\src, :32], r1
209*c0909341SAndroid Build Coastguard Worker.endif
210*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
211*c0909341SAndroid Build Coastguard Worker        vaddw.u8        \adddst, \adddst, \addsrc
212*c0909341SAndroid Build Coastguard Worker.endif
213*c0909341SAndroid Build Coastguard Worker.ifnb \store
214*c0909341SAndroid Build Coastguard Worker        vst1.32         {\store[0]},  [\dst, :32], r1
215*c0909341SAndroid Build Coastguard Worker.endif
216*c0909341SAndroid Build Coastguard Worker.ifnb \narrowsrc
217*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     \narrowdst, \narrowsrc
218*c0909341SAndroid Build Coastguard Worker.endif
219*c0909341SAndroid Build Coastguard Worker.ifnb \store
220*c0909341SAndroid Build Coastguard Worker        vst1.32         {\store[1]},  [\dst, :32], r1
221*c0909341SAndroid Build Coastguard Worker.endif
222*c0909341SAndroid Build Coastguard Worker.endm
223*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x16 dst, src
224*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
225*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,    ,    ,    ,    ,    ,    ,  \dst, \src
226*c0909341SAndroid Build Coastguard Worker        load_add_store4 d1,  q8,    ,    ,    ,    ,    ,  \dst, \src
227*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  q9,  d0,  q8,    ,    ,    ,  \dst, \src
228*c0909341SAndroid Build Coastguard Worker        load_add_store4 d3,  q10, d1,  q9,  q8,  d0,    ,  \dst, \src
229*c0909341SAndroid Build Coastguard Worker        load_add_store4 d4,  q11, d2,  q10, q9,  d1,  d0,  \dst, \src
230*c0909341SAndroid Build Coastguard Worker        load_add_store4 d5,  q12, d3,  q11, q10, d2,  d1,  \dst, \src
231*c0909341SAndroid Build Coastguard Worker        load_add_store4 d6,  q13, d4,  q12, q11, d3,  d2,  \dst, \src
232*c0909341SAndroid Build Coastguard Worker        load_add_store4 d7,  q14, d5,  q13, q12, d4,  d3,  \dst, \src
233*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,  q15, d6,  q14, q13, d5,  d4,  \dst, \src
234*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     , d7,  q15, q14, d6,  d5,  \dst, \src
235*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     ,   ,     , q15, d7,  d6,  \dst, \src
236*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     ,   ,     ,    ,   ,  d7,  \dst, \src
237*c0909341SAndroid Build Coastguard Worker.endm
238*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x8 dst, src
239*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
240*c0909341SAndroid Build Coastguard Worker        load_add_store4 d0,    ,    ,    ,    ,    ,    ,  \dst, \src
241*c0909341SAndroid Build Coastguard Worker        load_add_store4 d1,  q8,    ,    ,    ,    ,    ,  \dst, \src
242*c0909341SAndroid Build Coastguard Worker        load_add_store4 d2,  q9,  d0,  q8,    ,    ,    ,  \dst, \src
243*c0909341SAndroid Build Coastguard Worker        load_add_store4 d3,  q10, d1,  q9,  q8,  d0,    ,  \dst, \src
244*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,  q11, d2,  q10, q9,  d1,  d0,  \dst, \src
245*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     , d3,  q11, q10, d2,  d1,  \dst, \src
246*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     ,   ,     , q11, d3,  d2,  \dst, \src
247*c0909341SAndroid Build Coastguard Worker        load_add_store4   ,     ,   ,     ,    ,   ,  d3,  \dst, \src
248*c0909341SAndroid Build Coastguard Worker.endm
249*c0909341SAndroid Build Coastguard Worker
250*c0909341SAndroid Build Coastguard Worker.macro idct_dc w, h, shift
251*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
252*c0909341SAndroid Build Coastguard Worker        bne             1f
253*c0909341SAndroid Build Coastguard Worker        vmov.i16        d30, #0
254*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
255*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16[]},  [r2, :16]
256*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
257*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d16, d16, d0[0]
258*c0909341SAndroid Build Coastguard Worker        vst1.16         {d30[0]}, [r2, :16]
259*c0909341SAndroid Build Coastguard Worker.if (\w == 2*\h) || (2*\w == \h)
260*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d16, d16, d0[0]
261*c0909341SAndroid Build Coastguard Worker.endif
262*c0909341SAndroid Build Coastguard Worker.if \shift > 0
263*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d16, #\shift
264*c0909341SAndroid Build Coastguard Worker.endif
265*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d20, d16, d0[0]
266*c0909341SAndroid Build Coastguard Worker        mov             r3,  #\h
267*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d20, #4
268*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d17, d20, #4
269*c0909341SAndroid Build Coastguard Worker        b               idct_dc_w\w\()_neon
270*c0909341SAndroid Build Coastguard Worker1:
271*c0909341SAndroid Build Coastguard Worker.endm
272*c0909341SAndroid Build Coastguard Worker
273*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w4_neon
274*c0909341SAndroid Build Coastguard Worker1:
275*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[0]}, [r0, :32], r1
276*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]}, [r0, :32], r1
277*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[0]}, [r0, :32], r1
278*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]}, [r0, :32], r1
279*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #4
280*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
281*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d0
282*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q10
283*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d1
284*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]}, [r0, :32], r1
285*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q11
286*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[1]}, [r0, :32], r1
287*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]}, [r0, :32], r1
288*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[1]}, [r0, :32], r1
289*c0909341SAndroid Build Coastguard Worker        bgt             1b
290*c0909341SAndroid Build Coastguard Worker        bx              lr
291*c0909341SAndroid Build Coastguard Workerendfunc
292*c0909341SAndroid Build Coastguard Worker
293*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w8_neon
294*c0909341SAndroid Build Coastguard Worker1:
295*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r0, :64], r1
296*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r0, :64], r1
297*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r0, :64], r1
298*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d0
299*c0909341SAndroid Build Coastguard Worker        vld1.8          {d3}, [r0, :64], r1
300*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
301*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #4
302*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d1
303*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q10
304*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d2
305*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q11
306*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q13, q8,  d3
307*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0}, [r0, :64], r1
308*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d2,  q12
309*c0909341SAndroid Build Coastguard Worker        vst1.8          {d1}, [r0, :64], r1
310*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d3,  q13
311*c0909341SAndroid Build Coastguard Worker        vst1.8          {d2}, [r0, :64], r1
312*c0909341SAndroid Build Coastguard Worker        vst1.8          {d3}, [r0, :64], r1
313*c0909341SAndroid Build Coastguard Worker        bgt             1b
314*c0909341SAndroid Build Coastguard Worker        bx              lr
315*c0909341SAndroid Build Coastguard Workerendfunc
316*c0909341SAndroid Build Coastguard Worker
317*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w16_neon
318*c0909341SAndroid Build Coastguard Worker1:
319*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r0, :128], r1
320*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r0, :128], r1
321*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r0, :128], r1
322*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #4
323*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d0
324*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d1
325*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r0, :128], r1
326*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d2
327*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q13, q8,  d3
328*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
329*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q14, q8,  d4
330*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q15, q8,  d5
331*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q10
332*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q11
333*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d6
334*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d7
335*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d2,  q12
336*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d3,  q13
337*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q14
338*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d5,  q15
339*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0}, [r0, :128], r1
340*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d6,  q10
341*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d7,  q11
342*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1}, [r0, :128], r1
343*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2}, [r0, :128], r1
344*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3}, [r0, :128], r1
345*c0909341SAndroid Build Coastguard Worker        bgt             1b
346*c0909341SAndroid Build Coastguard Worker        bx              lr
347*c0909341SAndroid Build Coastguard Workerendfunc
348*c0909341SAndroid Build Coastguard Worker
349*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w32_neon
350*c0909341SAndroid Build Coastguard Worker1:
351*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [r0, :128], r1
352*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #2
353*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r0, :128], r1
354*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d0
355*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d1
356*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d2
357*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q13, q8,  d3
358*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #1
359*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q14, q8,  d4
360*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q15, q8,  d5
361*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q10
362*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q11
363*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d6
364*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d7
365*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d2,  q12
366*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d3,  q13
367*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q14
368*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d5,  q15
369*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r0, :128], r1
370*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d6,  q10
371*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d7,  q11
372*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2, q3}, [r0, :128], r1
373*c0909341SAndroid Build Coastguard Worker        bgt             1b
374*c0909341SAndroid Build Coastguard Worker        bx              lr
375*c0909341SAndroid Build Coastguard Workerendfunc
376*c0909341SAndroid Build Coastguard Worker
377*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w64_neon
378*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
379*c0909341SAndroid Build Coastguard Worker1:
380*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [r0, :128]!
381*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #1
382*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r0, :128]
383*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d0
384*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d1
385*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d2
386*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q13, q8,  d3
387*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  #32
388*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q14, q8,  d4
389*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q15, q8,  d5
390*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q10
391*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q11
392*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d6
393*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d7
394*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d2,  q12
395*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d3,  q13
396*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q14
397*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d5,  q15
398*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0, q1}, [r0, :128]!
399*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d6,  q10
400*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d7,  q11
401*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2, q3}, [r0, :128], r1
402*c0909341SAndroid Build Coastguard Worker        bgt             1b
403*c0909341SAndroid Build Coastguard Worker        bx              lr
404*c0909341SAndroid Build Coastguard Workerendfunc
405*c0909341SAndroid Build Coastguard Worker
406*c0909341SAndroid Build Coastguard Worker.macro iwht4
407*c0909341SAndroid Build Coastguard Worker        vadd.i16        d16, d16, d17
408*c0909341SAndroid Build Coastguard Worker        vsub.i16        d21, d18, d19
409*c0909341SAndroid Build Coastguard Worker        vsub.i16        d20, d16, d21
410*c0909341SAndroid Build Coastguard Worker        vshr.s16        d20, d20, #1
411*c0909341SAndroid Build Coastguard Worker        vsub.i16        d18, d20, d17
412*c0909341SAndroid Build Coastguard Worker        vsub.i16        d17, d20, d19
413*c0909341SAndroid Build Coastguard Worker        vadd.i16        d19, d21, d18
414*c0909341SAndroid Build Coastguard Worker        vsub.i16        d16, d16, d17
415*c0909341SAndroid Build Coastguard Worker.endm
416*c0909341SAndroid Build Coastguard Worker
417*c0909341SAndroid Build Coastguard Worker.macro idct_4h_x4 r0, r1, r2, r3
418*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  \r1, \r3, d0[3], d0[2]
419*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  \r1, \r3, d0[2], d0[3]
420*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q1,  \r0, \r2, d0[0], d0[0]
421*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q3,  #12
422*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d7,  q2,  #12
423*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  \r0, \r2, d0[0], d0[0]
424*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d2,  q1,  #12
425*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d3,  q2,  #12
426*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r0, d2,  d6
427*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r3, d2,  d6
428*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r1, d3,  d7
429*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r2, d3,  d7
430*c0909341SAndroid Build Coastguard Worker.endm
431*c0909341SAndroid Build Coastguard Worker
432*c0909341SAndroid Build Coastguard Worker.macro idct_8h_x4 q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
433*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q6,  q7,  \r2, \r3, \r6, \r7, d0[3], d0[2]
434*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q4,  q5,  \r2, \r3, \r6, \r7, d0[2], d0[3]
435*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q2,  q3,  \r0, \r1, \r4, \r5, d0[0], d0[0]
436*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d12, d13, q6,  q7,  #12
437*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d14, d15, q4,  q5,  #12
438*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q4,  q5,  \r0, \r1, \r4, \r5, d0[0], d0[0]
439*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d4,  d5,  q2,  q3,  #12
440*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d6,  d7,  q4,  q5,  #12
441*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q0, q2,  q6
442*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q3, q2,  q6
443*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q1, q3,  q7
444*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q2, q3,  q7
445*c0909341SAndroid Build Coastguard Worker.endm
446*c0909341SAndroid Build Coastguard Worker
447*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x4_neon, export=1
448*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
449*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r12, :64]
450*c0909341SAndroid Build Coastguard Worker        idct_4h_x4      d16, d17, d18, d19
451*c0909341SAndroid Build Coastguard Worker        bx              lr
452*c0909341SAndroid Build Coastguard Workerendfunc
453*c0909341SAndroid Build Coastguard Worker
454*c0909341SAndroid Build Coastguard Workerfunction inv_dct_8h_x4_neon, export=1
455*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
456*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r12, :64]
457*c0909341SAndroid Build Coastguard Worker        idct_8h_x4      q8,  q9,  q10, q11, d16, d17, d18, d19, d20, d21, d22, d23
458*c0909341SAndroid Build Coastguard Worker        bx              lr
459*c0909341SAndroid Build Coastguard Workerendfunc
460*c0909341SAndroid Build Coastguard Worker
461*c0909341SAndroid Build Coastguard Worker.macro iadst_4x4 o0, o1, o2, o3
462*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst4_coeffs
463*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0, d1}, [r12, :128]
464*c0909341SAndroid Build Coastguard Worker
465*c0909341SAndroid Build Coastguard Worker        vsubl.s16       q1,  d16, d18
466*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d0[0]
467*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d18, d0[1]
468*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d19, d0[2]
469*c0909341SAndroid Build Coastguard Worker        vmull.s16       q10, d17, d0[3]
470*c0909341SAndroid Build Coastguard Worker        vaddw.s16       q1,  q1,  d19
471*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d16, d0[2]
472*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q3,  d18, d0[0]
473*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q3,  d19, d0[1]
474*c0909341SAndroid Build Coastguard Worker
475*c0909341SAndroid Build Coastguard Worker        vadd.s32        q11, q2,  q3
476*c0909341SAndroid Build Coastguard Worker        vmul.s32        q1,  q1,  d1[0]
477*c0909341SAndroid Build Coastguard Worker        vadd.s32        q2,  q2,  q10
478*c0909341SAndroid Build Coastguard Worker        vadd.s32        q3,  q3,  q10
479*c0909341SAndroid Build Coastguard Worker        vsub.s32        q11, q11, q10
480*c0909341SAndroid Build Coastguard Worker
481*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o0, q2,  #12
482*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o2, q1,  #12
483*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o1, q3,  #12
484*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o3, q11, #12
485*c0909341SAndroid Build Coastguard Worker.endm
486*c0909341SAndroid Build Coastguard Worker
487*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x4_neon, export=1
488*c0909341SAndroid Build Coastguard Worker        iadst_4x4       d16, d17, d18, d19
489*c0909341SAndroid Build Coastguard Worker        bx              lr
490*c0909341SAndroid Build Coastguard Workerendfunc
491*c0909341SAndroid Build Coastguard Worker
492*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x4_neon, export=1
493*c0909341SAndroid Build Coastguard Worker        iadst_4x4       d19, d18, d17, d16
494*c0909341SAndroid Build Coastguard Worker        bx              lr
495*c0909341SAndroid Build Coastguard Workerendfunc
496*c0909341SAndroid Build Coastguard Worker
497*c0909341SAndroid Build Coastguard Worker.macro iadst_8x4 o0, o1, o2, o3, o4, o5, o6, o7
498*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst4_coeffs
499*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0, d1}, [r12, :128]
500*c0909341SAndroid Build Coastguard Worker
501*c0909341SAndroid Build Coastguard Worker        vsubl.s16       q2,  d16, d20
502*c0909341SAndroid Build Coastguard Worker        vsubl.s16       q3,  d17, d21
503*c0909341SAndroid Build Coastguard Worker        vmull.s16       q4,  d16, d0[0]
504*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d20, d0[1]
505*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d22, d0[2]
506*c0909341SAndroid Build Coastguard Worker        vmull.s16       q5,  d17, d0[0]
507*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d21, d0[1]
508*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d23, d0[2]
509*c0909341SAndroid Build Coastguard Worker        vaddw.s16       q2,  q2,  d22
510*c0909341SAndroid Build Coastguard Worker        vaddw.s16       q3,  q3,  d23
511*c0909341SAndroid Build Coastguard Worker        vmull.s16       q6,  d16, d0[2]
512*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q6,  d20, d0[0]
513*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q6,  d22, d0[1]
514*c0909341SAndroid Build Coastguard Worker        vmull.s16       q7,  d17, d0[2]
515*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q7,  d21, d0[0]
516*c0909341SAndroid Build Coastguard Worker        vmlsl.s16       q7,  d23, d0[1]
517*c0909341SAndroid Build Coastguard Worker
518*c0909341SAndroid Build Coastguard Worker        vmul.s32        q10, q2,  d1[0]
519*c0909341SAndroid Build Coastguard Worker        vmul.s32        q11, q3,  d1[0]
520*c0909341SAndroid Build Coastguard Worker
521*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d18, d0[3]
522*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d19, d0[3]
523*c0909341SAndroid Build Coastguard Worker
524*c0909341SAndroid Build Coastguard Worker        vadd.s32        q8,  q4,  q2 // out0
525*c0909341SAndroid Build Coastguard Worker        vadd.s32        q9,  q5,  q3
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard Worker        vadd.s32        q4,  q4,  q6 // out3
528*c0909341SAndroid Build Coastguard Worker        vadd.s32        q5,  q5,  q7
529*c0909341SAndroid Build Coastguard Worker
530*c0909341SAndroid Build Coastguard Worker        vadd.s32        q6,  q6,  q2 // out1
531*c0909341SAndroid Build Coastguard Worker        vadd.s32        q7,  q7,  q3
532*c0909341SAndroid Build Coastguard Worker
533*c0909341SAndroid Build Coastguard Worker        vsub.s32        q4,  q4,  q2 // out3
534*c0909341SAndroid Build Coastguard Worker        vsub.s32        q5,  q5,  q3
535*c0909341SAndroid Build Coastguard Worker
536*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q10, #12
537*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q11, #12
538*c0909341SAndroid Build Coastguard Worker
539*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o0, q8,  #12
540*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o1, q9,  #12
541*c0909341SAndroid Build Coastguard Worker
542*c0909341SAndroid Build Coastguard Worker.ifc \o4, d18
543*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q10
544*c0909341SAndroid Build Coastguard Worker.endif
545*c0909341SAndroid Build Coastguard Worker
546*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o2, q6,  #12
547*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o3, q7,  #12
548*c0909341SAndroid Build Coastguard Worker
549*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o6, q4,  #12
550*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o7, q5,  #12
551*c0909341SAndroid Build Coastguard Worker.endm
552*c0909341SAndroid Build Coastguard Worker
553*c0909341SAndroid Build Coastguard Workerfunction inv_adst_8h_x4_neon, export=1
554*c0909341SAndroid Build Coastguard Worker        iadst_8x4       d16, d17, d18, d19, d20, d21, d22, d23
555*c0909341SAndroid Build Coastguard Worker        bx              lr
556*c0909341SAndroid Build Coastguard Workerendfunc
557*c0909341SAndroid Build Coastguard Worker
558*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_8h_x4_neon, export=1
559*c0909341SAndroid Build Coastguard Worker        iadst_8x4       d22, d23, d20, d21, d18, d19, d16, d17
560*c0909341SAndroid Build Coastguard Worker        bx              lr
561*c0909341SAndroid Build Coastguard Workerendfunc
562*c0909341SAndroid Build Coastguard Worker
563*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x4_neon, export=1
564*c0909341SAndroid Build Coastguard Worker        movw            r12, #(5793-4096)*8
565*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
566*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q2,  q8,  d0[0]
567*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q3,  q9,  d0[0]
568*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q8,  q8,  q2
569*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q9,  q9,  q3
570*c0909341SAndroid Build Coastguard Worker        bx              lr
571*c0909341SAndroid Build Coastguard Workerendfunc
572*c0909341SAndroid Build Coastguard Worker
573*c0909341SAndroid Build Coastguard Workerfunction inv_identity_8h_x4_neon, export=1
574*c0909341SAndroid Build Coastguard Worker        movw            r12, #(5793-4096)*8
575*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
576*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q1,  q8,  d0[0]
577*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q2,  q9,  d0[0]
578*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q3,  q10, d0[0]
579*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q8,  q8,  q1
580*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q1,  q11, d0[0]
581*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q9,  q9,  q2
582*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q10, q10, q3
583*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q11, q11, q1
584*c0909341SAndroid Build Coastguard Worker        bx              lr
585*c0909341SAndroid Build Coastguard Workerendfunc
586*c0909341SAndroid Build Coastguard Worker
587*c0909341SAndroid Build Coastguard Worker.macro identity_8x4_shift1 r0, r1, r2, r3, c
588*c0909341SAndroid Build Coastguard Worker.irp i, \r0, \r1, \r2, \r3
589*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q1,  \i,  \c
590*c0909341SAndroid Build Coastguard Worker        vrhadd.s16      \i,  \i,  q1
591*c0909341SAndroid Build Coastguard Worker.endr
592*c0909341SAndroid Build Coastguard Worker.endm
593*c0909341SAndroid Build Coastguard Worker
594*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_wht_wht_4x4_8bpc_neon, export=1
595*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
596*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
597*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]
598*c0909341SAndroid Build Coastguard Worker        vst1.16         {q15}, [r2, :128]!
599*c0909341SAndroid Build Coastguard Worker
600*c0909341SAndroid Build Coastguard Worker        vshr.s16        q8,  q8,  #2
601*c0909341SAndroid Build Coastguard Worker        vshr.s16        q9,  q9,  #2
602*c0909341SAndroid Build Coastguard Worker
603*c0909341SAndroid Build Coastguard Worker        iwht4
604*c0909341SAndroid Build Coastguard Worker
605*c0909341SAndroid Build Coastguard Worker        vst1.16         {q15}, [r2, :128]!
606*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
607*c0909341SAndroid Build Coastguard Worker
608*c0909341SAndroid Build Coastguard Worker        iwht4
609*c0909341SAndroid Build Coastguard Worker
610*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r0, :32], r1
611*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]}, [r0, :32], r1
612*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]},  [r0, :32], r1
613*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]}, [r0, :32], r1
614*c0909341SAndroid Build Coastguard Worker
615*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
616*c0909341SAndroid Build Coastguard Workerendfunc
617*c0909341SAndroid Build Coastguard Worker
618*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x4_neon
619*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
620*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]
621*c0909341SAndroid Build Coastguard Worker        vst1.16         {q15}, [r2, :128]!
622*c0909341SAndroid Build Coastguard Worker
623*c0909341SAndroid Build Coastguard Worker        blx             r4
624*c0909341SAndroid Build Coastguard Worker
625*c0909341SAndroid Build Coastguard Worker        vst1.16         {q15}, [r2, :128]!
626*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
627*c0909341SAndroid Build Coastguard Worker
628*c0909341SAndroid Build Coastguard Worker        blx             r5
629*c0909341SAndroid Build Coastguard Worker
630*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r0, :32], r1
631*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]}, [r0, :32], r1
632*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]},  [r0, :32], r1
633*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]}, [r0, :32], r1
634*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q8,  #4
635*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q9,  #4
636*c0909341SAndroid Build Coastguard Worker
637*c0909341SAndroid Build Coastguard WorkerL(itx_4x4_end):
638*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #2
639*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q8,  q8,  d0
640*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q8
641*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q9,  q9,  d1
642*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]}, [r0, :32], r1
643*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q9
644*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[1]}, [r0, :32], r1
645*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]}, [r0, :32], r1
646*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[1]}, [r0, :32], r1
647*c0909341SAndroid Build Coastguard Worker
648*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
649*c0909341SAndroid Build Coastguard Workerendfunc
650*c0909341SAndroid Build Coastguard Worker
651*c0909341SAndroid Build Coastguard Worker.macro def_fn_4x4 txfm1, txfm2
652*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_4x4_8bpc_neon, export=1
653*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
654*c0909341SAndroid Build Coastguard Worker
655*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
656*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
657*c0909341SAndroid Build Coastguard Worker        bne             1f
658*c0909341SAndroid Build Coastguard Worker        vmov.i16        d30, #0
659*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
660*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16[]},  [r2, :16]
661*c0909341SAndroid Build Coastguard Worker        vdup.16         d4,  r12
662*c0909341SAndroid Build Coastguard Worker        vst1.16         {d30[0]}, [r2, :16]
663*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d16, d16, d4[0]
664*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[0]},  [r0, :32], r1
665*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d20, d16, d4[0]
666*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]},  [r0, :32], r1
667*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d16, d20, #4
668*c0909341SAndroid Build Coastguard Worker        vrshr.s16       d17, d20, #4
669*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[0]},  [r0, :32], r1
670*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q8
671*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]}, [r0, :32], r1
672*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
673*c0909341SAndroid Build Coastguard Worker1:
674*c0909341SAndroid Build Coastguard Worker.endif
675*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4h_x4_neon
676*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_4h_x4_neon
677*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_4x4_neon
678*c0909341SAndroid Build Coastguard Workerendfunc
679*c0909341SAndroid Build Coastguard Worker.endm
680*c0909341SAndroid Build Coastguard Worker
681*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, dct
682*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, identity
683*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, adst
684*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, flipadst
685*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, identity
686*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, dct
687*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, adst
688*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, flipadst
689*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, dct
690*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, adst
691*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, flipadst
692*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, dct
693*c0909341SAndroid Build Coastguard Worker
694*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, identity
695*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, identity
696*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, adst
697*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, flipadst
698*c0909341SAndroid Build Coastguard Worker
699*c0909341SAndroid Build Coastguard Worker.macro idct_8h_x8 q0, q1, q2, q3, q4, q5, q6, q7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
700*c0909341SAndroid Build Coastguard Worker        idct_8h_x4      \q0, \q2, \q4, \q6, \r0, \r1, \r4, \r5, \r8, \r9, \r12, \r13
701*c0909341SAndroid Build Coastguard Worker
702*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q2,   q3,   \r2,  \r3,  \r14, \r15, d1[0], d1[1] // -> t4a
703*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q4,   q5,   \r2,  \r3,  \r14, \r15, d1[1], d1[0] // -> t7a
704*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q6,   q7,   \r10, \r11, \r6,  \r7,  d1[2], d1[3] // -> t5a
705*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r2,  \r3,  q2,   q3,   #12         // t4a
706*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r14, \r15, q4,   q5,   #12         // t7a
707*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q2,   q3,   \r10, \r11, \r6,  \r7,  d1[3], d1[2] // -> t6a
708*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r6,  \r7,  q6,   q7,   #12         // t5a
709*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r10, \r11, q2,   q3,   #12         // t6a
710*c0909341SAndroid Build Coastguard Worker
711*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,   \q1,  \q3 // t4
712*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q1,  \q1,  \q3 // t5a
713*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q3,   \q7,  \q5 // t7
714*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q3,  \q7,  \q5 // t6a
715*c0909341SAndroid Build Coastguard Worker
716*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q4,   q5,   \r6,  \r7,  \r2,  \r3,  d0[0], d0[0] // -> t5
717*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q6,   q7,   \r6,  \r7,  \r2,  \r3,  d0[0], d0[0] // -> t6
718*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d8,   d9,   q4,   q5,  #12 // t5
719*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d10,  d11,  q6,   q7,  #12 // t6
720*c0909341SAndroid Build Coastguard Worker
721*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q7,  \q0,  q3 // out7
722*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q0,  \q0,  q3 // out0
723*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q1,  \q2,  q5 // out1
724*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q6,   \q2,  q5 // out6
725*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q2,  \q4,  q4 // out2
726*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q5,  \q4,  q4 // out5
727*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q3,  \q6,  q2 // out3
728*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \q4,  \q6,  q2 // out4
729*c0909341SAndroid Build Coastguard Worker        vmov            \q6,  q6       // out6
730*c0909341SAndroid Build Coastguard Worker.endm
731*c0909341SAndroid Build Coastguard Worker
732*c0909341SAndroid Build Coastguard Worker.macro idct_4h_x8 r0, r1, r2, r3, r4, r5, r6, r7
733*c0909341SAndroid Build Coastguard Worker        idct_4h_x4      \r0, \r2, \r4, \r6
734*c0909341SAndroid Build Coastguard Worker
735*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q1,   \r1,  \r7, d1[0], d1[1] // -> t4a
736*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,   \r1,  \r7, d1[1], d1[0] // -> t7a
737*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,   \r5,  \r3, d1[2], d1[3] // -> t5a
738*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r1,  q1,   #12               // t4a
739*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q1,   \r5,  \r3, d1[3], d1[2] // -> t6a
740*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r7,  q2,   #12               // t7a
741*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r3,  q3,   #12               // t5a
742*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r5,  q1,   #12               // taa
743*c0909341SAndroid Build Coastguard Worker
744*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d2,   \r1,  \r3 // t4
745*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r1,  \r1,  \r3 // t5a
746*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d3,   \r7,  \r5 // t7
747*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r3,  \r7,  \r5 // t6a
748*c0909341SAndroid Build Coastguard Worker
749*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,   \r3,  \r1, d0[0], d0[0] // -> t5
750*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,   \r3,  \r1, d0[0], d0[0] // -> t6
751*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,   q2,   #12               // t5
752*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,   q3,   #12               // t6
753*c0909341SAndroid Build Coastguard Worker
754*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r7,  \r0,  d3 // out7
755*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r0,  \r0,  d3 // out0
756*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r1,  \r2,  d5 // out1
757*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d6,   \r2,  d5 // out6
758*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r2,  \r4,  d4 // out2
759*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r5,  \r4,  d4 // out5
760*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r3,  \r6,  d2 // out3
761*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r4,  \r6,  d2 // out4
762*c0909341SAndroid Build Coastguard Worker        vmov            \r6,  d6       // out6
763*c0909341SAndroid Build Coastguard Worker.endm
764*c0909341SAndroid Build Coastguard Worker
765*c0909341SAndroid Build Coastguard Workerfunction inv_dct_8h_x8_neon, export=1
766*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
767*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r12, :128]
768*c0909341SAndroid Build Coastguard Worker        idct_8h_x8      q8,  q9,  q10, q11, q12, q13, q14, q15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
769*c0909341SAndroid Build Coastguard Worker        bx              lr
770*c0909341SAndroid Build Coastguard Workerendfunc
771*c0909341SAndroid Build Coastguard Worker
772*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x8_neon, export=1
773*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
774*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r12, :128]
775*c0909341SAndroid Build Coastguard Worker        idct_4h_x8      d16, d17, d18, d19, d20, d21, d22, d23
776*c0909341SAndroid Build Coastguard Worker        bx              lr
777*c0909341SAndroid Build Coastguard Workerendfunc
778*c0909341SAndroid Build Coastguard Worker
779*c0909341SAndroid Build Coastguard Worker.macro iadst_8h_x8 q0, q1, q2, q3, q4, q5, q6, q7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
780*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst8_coeffs
781*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0, d1, d2}, [r12, :64]
782*c0909341SAndroid Build Coastguard Worker
783*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q2,  q3,  d30, d31, d16, d17, d0[0], d0[1]
784*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q4,  q5,  d30, d31, d16, d17, d0[1], d0[0]
785*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q6,  q7,  d26, d27, d20, d21, d0[2], d0[3]
786*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d16, d17, q2,  q3,  #12  // t0a
787*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d30, d31, q4,  q5,  #12  // t1a
788*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q2,  q3,  d26, d27, d20, d21, d0[3], d0[2]
789*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q4,  q5,  d22, d23, d24, d25, d1[0], d1[1]
790*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d20, d21, q6,  q7,  #12  // t2a
791*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d26, d27, q2,  q3,  #12  // t3a
792*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q6,  q7,  d22, d23, d24, d25, d1[1], d1[0]
793*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q2,  q3,  d18, d19, d28, d29, d1[2], d1[3]
794*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d24, d25, q4,  q5,  #12  // t4a
795*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d22, d23, q6,  q7,  #12  // t5a
796*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q4,  q5,  d18, d19, d28, d29, d1[3], d1[2]
797*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d28, d29, q2,  q3,  #12  // t6a
798*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d18, d19, q4,  q5,  #12  // t7a
799*c0909341SAndroid Build Coastguard Worker
800*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q2,  q8,  q12 // t0
801*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q3,  q8,  q12 // t4
802*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q4,  q15, q11 // t1
803*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q5,  q15, q11 // t5
804*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q6,  q10, q14 // t2
805*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q7,  q10, q14 // t6
806*c0909341SAndroid Build Coastguard Worker        vqadd.s16       q10, q13, q9  // t3
807*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q11, q13, q9  // t7
808*c0909341SAndroid Build Coastguard Worker
809*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q8,  q9,  d6,  d7,  d10, d11, d2[3], d2[2]
810*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q12, q13, d6,  d7,  d10, d11, d2[2], d2[3]
811*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q14, q15, d22, d23, d14, d15, d2[3], d2[2]
812*c0909341SAndroid Build Coastguard Worker
813*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d6,  d7,  q8,  q9,  #12  // t4a
814*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d10, d11, q12, q13, #12  // t5a
815*c0909341SAndroid Build Coastguard Worker
816*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q8,  q9,  d22, d23, d14, d15, d2[2], d2[3]
817*c0909341SAndroid Build Coastguard Worker
818*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d14, d15, q14, q15, #12  // t6a
819*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d22, d23, q8,  q9,  #12  // t7a
820*c0909341SAndroid Build Coastguard Worker
821*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q0, q2,  q6  // out0
822*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q2,  q2,  q6  // t2
823*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q7, q4,  q10 // out7
824*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q4,  q4,  q10 // t3
825*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \q7, \q7     // out7
826*c0909341SAndroid Build Coastguard Worker
827*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q1, q3,  q7  // out1
828*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q3,  q3,  q7  // t6
829*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \q6, q5,  q11 // out6
830*c0909341SAndroid Build Coastguard Worker        vqsub.s16       q5,  q5,  q11 // t7
831*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \q1, \q1     // out1
832*c0909341SAndroid Build Coastguard Worker
833*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q10, q11, d4,  d5,  d8,  d9,  d2[0], d2[0] // -> out3 (q11 or q12)
834*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q6,  q7,  d4,  d5,  d8,  d9,  d2[0], d2[0] // -> out4 (q12 or q11)
835*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl_8h  q12, q13, d6,  d7,  d10, d11, d2[0], d2[0] // -> out5 (q13 or q10)
836*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d4,  d5,  q10, q11, #12 // out3
837*c0909341SAndroid Build Coastguard Worker        vmull_vmlal_8h  q10, q11, d6,  d7,  d10, d11, d2[0], d2[0] // -> out2 (q10 or q13)
838*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      d6,  d7,  q12, q13, #12 // out5
839*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r4, \r5, q10, q11, #12 // out2 (q10 or q13)
840*c0909341SAndroid Build Coastguard Worker        vqrshrn_8h      \r8, \r9, q6,  q7,  #12 // out4 (q12 or q11)
841*c0909341SAndroid Build Coastguard Worker
842*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \q3, q2     // out3
843*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \q5, q3     // out5
844*c0909341SAndroid Build Coastguard Worker.endm
845*c0909341SAndroid Build Coastguard Worker
846*c0909341SAndroid Build Coastguard Worker.macro iadst_4h_x8 r0, r1, r2, r3, r4, r5, r6, r7
847*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst8_coeffs
848*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0, d1, d2}, [r12, :64]
849*c0909341SAndroid Build Coastguard Worker
850*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d23, d16, d0[0], d0[1]
851*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d23, d16, d0[1], d0[0]
852*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d21, d18, d0[2], d0[3]
853*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q2,  #12 // t0a
854*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q3,  #12 // t1a
855*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q5,  d21, d18, d0[3], d0[2]
856*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q6,  d19, d20, d1[0], d1[1]
857*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q4,  #12 // t2a
858*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q5,  #12 // t3a
859*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q7,  d19, d20, d1[1], d1[0]
860*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d17, d22, d1[2], d1[3]
861*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q6,  #12 // t4a
862*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q7,  #12 // t5a
863*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d17, d22, d1[3], d1[2]
864*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q2,  #12 // t6a
865*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q3,  #12 // t7a
866*c0909341SAndroid Build Coastguard Worker
867*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d4,  d16, d20 // t0
868*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  d16, d20 // t4
869*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d6,  d23, d19 // t1
870*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d7,  d23, d19 // t5
871*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d8,  d18, d22 // t2
872*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d9,  d18, d22 // t6
873*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d18, d21, d17 // t3
874*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d19, d21, d17 // t7
875*c0909341SAndroid Build Coastguard Worker
876*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q8,  d5,  d7,  d2[3], d2[2]
877*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q10, d5,  d7,  d2[2], d2[3]
878*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q11, d19, d9,  d2[3], d2[2]
879*c0909341SAndroid Build Coastguard Worker
880*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,  q8,  #12 // t4a
881*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d7,  q10, #12 // t5a
882*c0909341SAndroid Build Coastguard Worker
883*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q8,  d19, d9,  d2[2], d2[3]
884*c0909341SAndroid Build Coastguard Worker
885*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d9,  q11, #12 // t6a
886*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q8,  #12 // t7a
887*c0909341SAndroid Build Coastguard Worker
888*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r0, d4,  d8  // out0
889*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d4,  d4,  d8  // t2
890*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r7, d6,  d18 // out7
891*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d6,  d6,  d18 // t3
892*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \r7, \r7      // out7
893*c0909341SAndroid Build Coastguard Worker
894*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r1, d5,  d9  // out1
895*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  d5,  d9  // t6
896*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \r6, d7,  d19 // out6
897*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d7,  d7,  d19 // t7
898*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \r1, \r1      // out1
899*c0909341SAndroid Build Coastguard Worker
900*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q9,  d4,  d6,  d2[0], d2[0] // -> out3 (d19 or d20)
901*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d4,  d6,  d2[0], d2[0] // -> out4 (d20 or d19)
902*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q10, d5,  d7,  d2[0], d2[0] // -> out5 (d21 or d18)
903*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q9,  #12 // out3
904*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q9,  d5,  d7,  d2[0], d2[0] // -> out2 (d18 or d21)
905*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,  q10, #12 // out5
906*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r2, q9,  #12 // out2 (d18 or d21)
907*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \r4, q4,  #12 // out4 (d20 or d19)
908*c0909341SAndroid Build Coastguard Worker
909*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \r3, d4       // out3
910*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \r5, d5       // out5
911*c0909341SAndroid Build Coastguard Worker.endm
912*c0909341SAndroid Build Coastguard Worker
913*c0909341SAndroid Build Coastguard Workerfunction inv_adst_8h_x8_neon, export=1
914*c0909341SAndroid Build Coastguard Worker        iadst_8h_x8     q8,  q9,  q10, q11, q12, q13, q14, q15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
915*c0909341SAndroid Build Coastguard Worker        bx              lr
916*c0909341SAndroid Build Coastguard Workerendfunc
917*c0909341SAndroid Build Coastguard Worker
918*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_8h_x8_neon, export=1
919*c0909341SAndroid Build Coastguard Worker        iadst_8h_x8     q15, q14, q13, q12, q11, q10, q9,  q8,  d30, d31, d28, d29, d26, d27, d24, d25, d22, d23, d20, d21, d18, d19, d16, d17
920*c0909341SAndroid Build Coastguard Worker        bx              lr
921*c0909341SAndroid Build Coastguard Workerendfunc
922*c0909341SAndroid Build Coastguard Worker
923*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x8_neon, export=1
924*c0909341SAndroid Build Coastguard Worker        iadst_4h_x8     d16, d17, d18, d19, d20, d21, d22, d23
925*c0909341SAndroid Build Coastguard Worker        bx              lr
926*c0909341SAndroid Build Coastguard Workerendfunc
927*c0909341SAndroid Build Coastguard Worker
928*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x8_neon, export=1
929*c0909341SAndroid Build Coastguard Worker        iadst_4h_x8     d23, d22, d21, d20, d19, d18, d17, d16
930*c0909341SAndroid Build Coastguard Worker        bx              lr
931*c0909341SAndroid Build Coastguard Workerendfunc
932*c0909341SAndroid Build Coastguard Worker
933*c0909341SAndroid Build Coastguard Workerfunction inv_identity_8h_x8_neon, export=1
934*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q8,  q8,  #1
935*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q9,  q9,  #1
936*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q10, q10, #1
937*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q11, q11, #1
938*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q12, q12, #1
939*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q13, q13, #1
940*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q14, q14, #1
941*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q15, q15, #1
942*c0909341SAndroid Build Coastguard Worker        bx              lr
943*c0909341SAndroid Build Coastguard Workerendfunc
944*c0909341SAndroid Build Coastguard Worker
945*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x8_neon, export=1
946*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q8,  q8,  #1
947*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q9,  q9,  #1
948*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q10, q10, #1
949*c0909341SAndroid Build Coastguard Worker        vqshl.s16       q11, q11, #1
950*c0909341SAndroid Build Coastguard Worker        bx              lr
951*c0909341SAndroid Build Coastguard Workerendfunc
952*c0909341SAndroid Build Coastguard Worker
953*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8_base variant
954*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_8x8_neon
955*c0909341SAndroid Build Coastguard Worker        vmov.i16        q0,  #0
956*c0909341SAndroid Build Coastguard Worker        vmov.i16        q1,  #0
957*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2, :128]
958*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
959*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2, :128]
960*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
961*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r2, :128]
962*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]!
963*c0909341SAndroid Build Coastguard Worker        vld1.16         {q14, q15}, [r2, :128]
964*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r2, :128]
965*c0909341SAndroid Build Coastguard Worker
966*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
967*c0909341SAndroid Build Coastguard Worker        // The identity shl #1 and downshift srshr #1 cancel out
968*c0909341SAndroid Build Coastguard Worker
969*c0909341SAndroid Build Coastguard Worker        b               L(itx_8x8_epilog)
970*c0909341SAndroid Build Coastguard Worker.else
971*c0909341SAndroid Build Coastguard Worker        blx             r4
972*c0909341SAndroid Build Coastguard Worker
973*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q8,  #1
974*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q9,  #1
975*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q10, #1
976*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q11, q11, #1
977*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q12, q12, #1
978*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q13, q13, #1
979*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q14, q14, #1
980*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q15, q15, #1
981*c0909341SAndroid Build Coastguard Worker
982*c0909341SAndroid Build Coastguard WorkerL(itx_8x8_epilog):
983*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
984*c0909341SAndroid Build Coastguard Worker
985*c0909341SAndroid Build Coastguard Worker        blx             r5
986*c0909341SAndroid Build Coastguard Worker
987*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r7
988*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
989*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,pc}
990*c0909341SAndroid Build Coastguard Worker.endif
991*c0909341SAndroid Build Coastguard Workerendfunc
992*c0909341SAndroid Build Coastguard Worker.endm
993*c0909341SAndroid Build Coastguard Worker
994*c0909341SAndroid Build Coastguard Workerdef_fn_8x8_base identity_
995*c0909341SAndroid Build Coastguard Workerdef_fn_8x8_base
996*c0909341SAndroid Build Coastguard Worker
997*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8 txfm1, txfm2
998*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_8x8_8bpc_neon, export=1
999*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1000*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   8,   1
1001*c0909341SAndroid Build Coastguard Worker.endif
1002*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,r7,lr}
1003*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1004*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_8h_x8_neon
1005*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1006*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_8x8_neon
1007*c0909341SAndroid Build Coastguard Worker.else
1008*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_8h_x8_neon
1009*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_8x8_neon
1010*c0909341SAndroid Build Coastguard Worker.endif
1011*c0909341SAndroid Build Coastguard Workerendfunc
1012*c0909341SAndroid Build Coastguard Worker.endm
1013*c0909341SAndroid Build Coastguard Worker
1014*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, dct
1015*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, identity
1016*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, adst
1017*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, flipadst
1018*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, identity
1019*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, dct
1020*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, adst
1021*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, flipadst
1022*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, dct
1023*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, adst
1024*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, flipadst
1025*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, dct
1026*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, identity
1027*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, identity
1028*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, adst
1029*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, flipadst
1030*c0909341SAndroid Build Coastguard Worker
1031*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x4_neon
1032*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
1033*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
1034*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
1035*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1036*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]
1037*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]!
1038*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2, :128]
1039*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]
1040*c0909341SAndroid Build Coastguard Worker
1041*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9, q10, q11
1042*c0909341SAndroid Build Coastguard Worker
1043*c0909341SAndroid Build Coastguard Worker        blx             r4
1044*c0909341SAndroid Build Coastguard Worker
1045*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1046*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
1047*c0909341SAndroid Build Coastguard Worker        vswp            d17, d20
1048*c0909341SAndroid Build Coastguard Worker        vswp            d19, d21
1049*c0909341SAndroid Build Coastguard Worker        vswp            d18, d20
1050*c0909341SAndroid Build Coastguard Worker        vswp            d21, d22
1051*c0909341SAndroid Build Coastguard Worker
1052*c0909341SAndroid Build Coastguard Worker        blx             r5
1053*c0909341SAndroid Build Coastguard Worker
1054*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r0, r7
1055*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1056*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,pc}
1057*c0909341SAndroid Build Coastguard Workerendfunc
1058*c0909341SAndroid Build Coastguard Worker
1059*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x8_neon
1060*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
1061*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0
1062*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
1063*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1064*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2, :128]
1065*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]!
1066*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2, :128]
1067*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r2, :128]
1068*c0909341SAndroid Build Coastguard Worker
1069*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9, q10, q11
1070*c0909341SAndroid Build Coastguard Worker
1071*c0909341SAndroid Build Coastguard Worker        blx             r4
1072*c0909341SAndroid Build Coastguard Worker
1073*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
1074*c0909341SAndroid Build Coastguard Worker        vswp            d17, d20
1075*c0909341SAndroid Build Coastguard Worker        vswp            d19, d21
1076*c0909341SAndroid Build Coastguard Worker        vswp            d17, d18
1077*c0909341SAndroid Build Coastguard Worker        vswp            d19, d22
1078*c0909341SAndroid Build Coastguard Worker
1079*c0909341SAndroid Build Coastguard Worker        blx             r5
1080*c0909341SAndroid Build Coastguard Worker
1081*c0909341SAndroid Build Coastguard Worker        load_add_store_4x8 r0, r7
1082*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1083*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,r7,pc}
1084*c0909341SAndroid Build Coastguard Workerendfunc
1085*c0909341SAndroid Build Coastguard Worker
1086*c0909341SAndroid Build Coastguard Worker.macro def_fn_48 w, h, txfm1, txfm2
1087*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
1088*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1089*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  0
1090*c0909341SAndroid Build Coastguard Worker.endif
1091*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,r7,lr}
1092*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1093*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_\h\()h_x\w\()_neon
1094*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_\w\()h_x\h\()_neon
1095*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1096*c0909341SAndroid Build Coastguard Workerendfunc
1097*c0909341SAndroid Build Coastguard Worker.endm
1098*c0909341SAndroid Build Coastguard Worker
1099*c0909341SAndroid Build Coastguard Worker.macro def_fns_48 w, h
1100*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, dct
1101*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, identity
1102*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, adst
1103*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, flipadst
1104*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, identity
1105*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, dct
1106*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, adst
1107*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, flipadst
1108*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, dct
1109*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, adst
1110*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, flipadst
1111*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, dct
1112*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, identity
1113*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, identity
1114*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, adst
1115*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, flipadst
1116*c0909341SAndroid Build Coastguard Worker.endm
1117*c0909341SAndroid Build Coastguard Worker
1118*c0909341SAndroid Build Coastguard Workerdef_fns_48 4, 8
1119*c0909341SAndroid Build Coastguard Workerdef_fns_48 8, 4
1120*c0909341SAndroid Build Coastguard Worker
1121*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x16_neon, export=1
1122*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
1123*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r12, :128]
1124*c0909341SAndroid Build Coastguard Worker
1125*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d17, d31, d2[0], d2[1]  // -> t8a
1126*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d17, d31, d2[1], d2[0]  // -> t15a
1127*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d25, d23, d2[2], d2[3]  // -> t9a
1128*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q2,  #12                // t8a
1129*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q3,  #12                // t15a
1130*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d25, d23, d2[3], d2[2]  // -> t14a
1131*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d21, d27, d3[0], d3[1]  // -> t10a
1132*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q4,  #12                // t9a
1133*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q2,  #12                // t14a
1134*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d21, d27, d3[1], d3[0]  // -> t13a
1135*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d29, d19, d3[2], d3[3]  // -> t11a
1136*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q3,  #12                // t10a
1137*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q4,  #12                // t13a
1138*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d29, d19, d3[3], d3[2]  // -> t12a
1139*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q2,  #12                // t11a
1140*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q3,  #12                // t12a
1141*c0909341SAndroid Build Coastguard Worker
1142*c0909341SAndroid Build Coastguard Worker        idct_4h_x8      d16, d18, d20, d22, d24, d26, d28, d30
1143*c0909341SAndroid Build Coastguard Worker
1144*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d4,  d17, d23  // t9
1145*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d17, d23  // t8
1146*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  d31, d25  // t14
1147*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d25  // t15
1148*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d23, d19, d21  // t10
1149*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d19, d19, d21  // t11
1150*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d25, d29, d27  // t12
1151*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d29, d27  // t13
1152*c0909341SAndroid Build Coastguard Worker
1153*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d5,  d4,  d0[2], d0[3]  // -> t9a
1154*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d5,  d4,  d0[3], d0[2]  // -> t14a
1155*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q3,  #12                // t9a
1156*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q4,  #12                // t14a
1157*c0909341SAndroid Build Coastguard Worker
1158*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d29, d23, d0[2], d0[3]  // -> t13a
1159*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d29, d23, d0[3], d0[2]  // -> t10a
1160*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q3,  #12                // t13a
1161*c0909341SAndroid Build Coastguard Worker        vneg.s32        q4,  q4
1162*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q4,  #12                // t10a
1163*c0909341SAndroid Build Coastguard Worker
1164*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d4,  d17, d19  // t11a
1165*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d17, d19  // t8a
1166*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  d31, d25  // t12a
1167*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d25  // t15a
1168*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d19, d21, d23  // t9
1169*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d21, d23  // t10
1170*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d27, d29  // t13
1171*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d27, d27, d29  // t14
1172*c0909341SAndroid Build Coastguard Worker
1173*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d5,  d4,  d0[0], d0[0]  // -> t11
1174*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d5,  d4,  d0[0], d0[0]  // -> t12
1175*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d25, d21, d0[0], d0[0]  // -> t10a
1176*c0909341SAndroid Build Coastguard Worker
1177*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q3,  #12  // t11
1178*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d7,  q4,  #12  // t12
1179*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d25, d21, d0[0], d0[0]  // -> t13a
1180*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #12  // t10a
1181*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,  q4,  #12  // t13a
1182*c0909341SAndroid Build Coastguard Worker
1183*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d8,  d16, d31  // out0
1184*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d31, d16, d31  // out15
1185*c0909341SAndroid Build Coastguard Worker        vmov            d16, d8
1186*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d30, d17  // out7
1187*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d9,  d30, d17  // out8
1188*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d18, d27  // out1
1189*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d30, d18, d27  // out14
1190*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d18, d20, d5   // out2
1191*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d20, d5   // out13
1192*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d5,  d28, d19  // out6
1193*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d28, d19  // out9
1194*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d19, d22, d7   // out3
1195*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d28, d22, d7   // out12
1196*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d20, d24, d6   // out4
1197*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d27, d24, d6   // out11
1198*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d21, d26, d4   // out5
1199*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d26, d26, d4   // out10
1200*c0909341SAndroid Build Coastguard Worker        vmov            d24, d9
1201*c0909341SAndroid Build Coastguard Worker        vmov            d22, d5
1202*c0909341SAndroid Build Coastguard Worker
1203*c0909341SAndroid Build Coastguard Worker        bx              lr
1204*c0909341SAndroid Build Coastguard Workerendfunc
1205*c0909341SAndroid Build Coastguard Worker
1206*c0909341SAndroid Build Coastguard Worker.macro iadst_16 o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14, o15
1207*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, iadst16_coeffs
1208*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r12, :128]
1209*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
1210*c0909341SAndroid Build Coastguard Worker
1211*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d31, d16, d0[0], d0[1] // -> t0
1212*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d31, d16, d0[1], d0[0] // -> t1
1213*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d29, d18, d0[2], d0[3] // -> t2
1214*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q2,  #12               // t0
1215*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q3,  #12               // t1
1216*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d29, d18, d0[3], d0[2] // -> t3
1217*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d27, d20, d1[0], d1[1] // -> t4
1218*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q4,  #12               // t2
1219*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q2,  #12               // t3
1220*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d27, d20, d1[1], d1[0] // -> t5
1221*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d25, d22, d1[2], d1[3] // -> t6
1222*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q3,  #12               // t4
1223*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q4,  #12               // t5
1224*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d25, d22, d1[3], d1[2] // -> t7
1225*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d23, d24, d2[0], d2[1] // -> t8
1226*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q2,  #12               // t6
1227*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q3,  #12               // t7
1228*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d23, d24, d2[1], d2[0] // -> t9
1229*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d21, d26, d2[2], d2[3] // -> t10
1230*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q4,  #12               // t8
1231*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q2,  #12               // t9
1232*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d21, d26, d2[3], d2[2] // -> t11
1233*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d19, d28, d3[0], d3[1] // -> t12
1234*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q3,  #12               // t10
1235*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q4,  #12               // t11
1236*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d19, d28, d3[1], d3[0] // -> t13
1237*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d17, d30, d3[2], d3[3] // -> t14
1238*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q2,  #12               // t12
1239*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q3,  #12               // t13
1240*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d17, d30, d3[3], d3[2] // -> t15
1241*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q4,  #12               // t14
1242*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q2,  #12               // t15
1243*c0909341SAndroid Build Coastguard Worker
1244*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r12, :128]
1245*c0909341SAndroid Build Coastguard Worker
1246*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d16, d23 // t8a
1247*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d16, d23 // t0a
1248*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d31, d24 // t9a
1249*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d24 // t1a
1250*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d18, d21 // t2a
1251*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d18, d18, d21 // t10a
1252*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d24, d29, d26 // t3a
1253*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d29, d26 // t11a
1254*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d21, d20, d19 // t4a
1255*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d20, d19 // t12a
1256*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d26, d27, d28 // t5a
1257*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d27, d27, d28 // t13a
1258*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d19, d22, d17 // t6a
1259*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d22, d22, d17 // t14a
1260*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d28, d25, d30 // t7a
1261*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d25, d30 // t15a
1262*c0909341SAndroid Build Coastguard Worker
1263*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d2,  d3,  d1[1], d1[0] // -> t8
1264*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d2,  d3,  d1[0], d1[1] // -> t9
1265*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d18, d29, d1[3], d1[2] // -> t10
1266*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q2,  #12               // t8
1267*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q3,  #12               // t9
1268*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d18, d29, d1[2], d1[3] // -> t11
1269*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d27, d20, d1[1], d1[0] // -> t12
1270*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q4,  #12               // t10
1271*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q2,  #12               // t11
1272*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d27, d20, d1[0], d1[1] // -> t13
1273*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d25, d22, d1[3], d1[2] // -> t14
1274*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q3,  #12               // t12
1275*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q4,  #12               // t13
1276*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d25, d22, d1[2], d1[3] // -> t15
1277*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q2,  #12               // t14
1278*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q3,  #12               // t15
1279*c0909341SAndroid Build Coastguard Worker
1280*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d16, d21 // t4
1281*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d16, d21 // t0
1282*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d31, d26 // t5
1283*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d26 // t1
1284*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d21, d23, d19 // t2
1285*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d23, d23, d19 // t6
1286*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d26, d24, d28 // t3
1287*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d24, d24, d28 // t7
1288*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d19, d17, d27 // t8a
1289*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d17, d17, d27 // t12a
1290*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d28, d30, d20 // t9a
1291*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d30, d30, d20 // t13a
1292*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d27, d18, d25 // t10a
1293*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d18, d18, d25 // t14a
1294*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d20, d29, d22 // t11a
1295*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d29, d22 // t15a
1296*c0909341SAndroid Build Coastguard Worker
1297*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d2,  d3,  d0[3], d0[2] // -> t4a
1298*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d2,  d3,  d0[2], d0[3] // -> t5a
1299*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d24, d23, d0[3], d0[2] // -> t6a
1300*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q2,  #12               // t4a
1301*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q3,  #12               // t5a
1302*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d24, d23, d0[2], d0[3] // -> t7a
1303*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d17, d30, d0[3], d0[2] // -> t12
1304*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q4,  #12               // t6a
1305*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q2,  #12               // t7a
1306*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d17, d30, d0[2], d0[3] // -> t13
1307*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d29, d18, d0[3], d0[2] // -> t14
1308*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q3,  #12               // t12
1309*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d29, d18, d0[2], d0[3] // -> t15
1310*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q4,  #12               // t13
1311*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q2,  #12               // t14
1312*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q3,  #12               // t15
1313*c0909341SAndroid Build Coastguard Worker
1314*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d16, d21 // t2a
1315*c0909341SAndroid Build Coastguard Worker.ifc \o0, d16
1316*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o0, d16, d21 // out0
1317*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d31, d26 // t3a
1318*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o15,d31, d26 // out15
1319*c0909341SAndroid Build Coastguard Worker.else
1320*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d4,  d16, d21 // out0
1321*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d31, d26 // t3a
1322*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o15,d31, d26 // out15
1323*c0909341SAndroid Build Coastguard Worker        vmov            \o0, d4
1324*c0909341SAndroid Build Coastguard Worker.endif
1325*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o15, \o15    // out15
1326*c0909341SAndroid Build Coastguard Worker
1327*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d29, d18 // t15a
1328*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o13,d29, d18 // out13
1329*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o2, d17, d30 // out2
1330*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d26, d17, d30 // t14a
1331*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o13,\o13     // out13
1332*c0909341SAndroid Build Coastguard Worker
1333*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o1, d19, d27 // out1
1334*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d27, d19, d27 // t10
1335*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o14,d28, d20 // out14
1336*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d28, d20 // t11
1337*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o1, \o1      // out1
1338*c0909341SAndroid Build Coastguard Worker
1339*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o3, d22, d24 // out3
1340*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d22, d22, d24 // t6
1341*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \o12,d25, d23 // out12
1342*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d23, d25, d23 // t7
1343*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o3, \o3      // out3
1344*c0909341SAndroid Build Coastguard Worker
1345*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q12, d2,  d21, d0[0], d0[0] // -> out8 (d24 or d23)
1346*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d2,  d21, d0[0], d0[0] // -> out7 (d23 or d24)
1347*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d26, d3,  d0[0], d0[0] // -> out5 (d21 or d26)
1348*c0909341SAndroid Build Coastguard Worker
1349*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q12, #12 // out8
1350*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d4,  q2,  #12 // out7
1351*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d5,  q3,  #12 // out5
1352*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d26, d3,  d0[0], d0[0] // -> out10 (d26 or d21)
1353*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q1,  d22, d23, d0[0], d0[0] // -> out4 (d20 or d27)
1354*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q4,  #12 // out10
1355*c0909341SAndroid Build Coastguard Worker
1356*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d22, d23, d0[0], d0[0] // -> out11 (d27 or d20)
1357*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q11, d27, d20, d0[0], d0[0] // -> out6 (d22 or d25)
1358*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d27, d20, d0[0], d0[0] // -> out9 (d25 or d22)
1359*c0909341SAndroid Build Coastguard Worker
1360*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o4, q1,  #12 // out4
1361*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d7,  q3,  #12 // out9
1362*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d6,  q4,  #12 // out11
1363*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     \o6, q11, #12 // out6
1364*c0909341SAndroid Build Coastguard Worker
1365*c0909341SAndroid Build Coastguard Worker.ifc \o8, d23
1366*c0909341SAndroid Build Coastguard Worker        vmov            \o8, d24
1367*c0909341SAndroid Build Coastguard Worker        vmov            \o10,d26
1368*c0909341SAndroid Build Coastguard Worker.endif
1369*c0909341SAndroid Build Coastguard Worker
1370*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o7, d4  // out7
1371*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o5, d5  // out5
1372*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o11,d6  // out11
1373*c0909341SAndroid Build Coastguard Worker        vqneg.s16       \o9, d7  // out9
1374*c0909341SAndroid Build Coastguard Worker.endm
1375*c0909341SAndroid Build Coastguard Worker
1376*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x16_neon, export=1
1377*c0909341SAndroid Build Coastguard Worker        iadst_16        d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1378*c0909341SAndroid Build Coastguard Worker        bx              lr
1379*c0909341SAndroid Build Coastguard Workerendfunc
1380*c0909341SAndroid Build Coastguard Worker
1381*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x16_neon, export=1
1382*c0909341SAndroid Build Coastguard Worker        iadst_16        d31, d30, d29, d28, d27, d26, d25, d24, d23, d22, d21, d20, d19, d18, d17, d16
1383*c0909341SAndroid Build Coastguard Worker        bx              lr
1384*c0909341SAndroid Build Coastguard Workerendfunc
1385*c0909341SAndroid Build Coastguard Worker
1386*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x16_neon, export=1
1387*c0909341SAndroid Build Coastguard Worker        movw            r12, #2*(5793-4096)*8
1388*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1389*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1390*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q1,  \i,  d0[0]
1391*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \i,  \i,  \i
1392*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \i,  \i,  q1
1393*c0909341SAndroid Build Coastguard Worker.endr
1394*c0909341SAndroid Build Coastguard Worker        bx              lr
1395*c0909341SAndroid Build Coastguard Workerendfunc
1396*c0909341SAndroid Build Coastguard Worker
1397*c0909341SAndroid Build Coastguard Worker.macro identity_4x16_shift2 c
1398*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1399*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q2,  \i,  \c
1400*c0909341SAndroid Build Coastguard Worker        vshr.s16        q2,  q2,  #1
1401*c0909341SAndroid Build Coastguard Worker        vrhadd.s16      \i,  \i,  q2
1402*c0909341SAndroid Build Coastguard Worker.endr
1403*c0909341SAndroid Build Coastguard Worker.endm
1404*c0909341SAndroid Build Coastguard Worker
1405*c0909341SAndroid Build Coastguard Worker.macro identity_4x16_shift1 c
1406*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1407*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q2,  \i,  \c
1408*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #1
1409*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \i,  \i,  q2
1410*c0909341SAndroid Build Coastguard Worker.endr
1411*c0909341SAndroid Build Coastguard Worker.endm
1412*c0909341SAndroid Build Coastguard Worker
1413*c0909341SAndroid Build Coastguard Worker.macro identity_8x8_shift1 c
1414*c0909341SAndroid Build Coastguard Worker        identity_4x16_shift1 \c
1415*c0909341SAndroid Build Coastguard Worker.endm
1416*c0909341SAndroid Build Coastguard Worker
1417*c0909341SAndroid Build Coastguard Worker.macro identity_8x8 c
1418*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1419*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q2,  \i,  \c
1420*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \i,  \i,  \i
1421*c0909341SAndroid Build Coastguard Worker        vqadd.s16       \i,  \i,  q2
1422*c0909341SAndroid Build Coastguard Worker.endr
1423*c0909341SAndroid Build Coastguard Worker.endm
1424*c0909341SAndroid Build Coastguard Worker
1425*c0909341SAndroid Build Coastguard Worker.macro def_horz_16 scale=0, identity=0, shift=2, suffix
1426*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_16x4_neon
1427*c0909341SAndroid Build Coastguard Worker        push            {lr}
1428*c0909341SAndroid Build Coastguard Worker        vmov.i16        d7,  #0
1429*c0909341SAndroid Build Coastguard Worker.if \identity
1430*c0909341SAndroid Build Coastguard Worker        movw            r12, #2*(5793-4096)*8
1431*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1432*c0909341SAndroid Build Coastguard Worker.endif
1433*c0909341SAndroid Build Coastguard Worker.if \scale
1434*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
1435*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  r12
1436*c0909341SAndroid Build Coastguard Worker.endif
1437*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1438*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64]
1439*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [r7, :64], r8
1440*c0909341SAndroid Build Coastguard Worker.endr
1441*c0909341SAndroid Build Coastguard Worker.if \scale
1442*c0909341SAndroid Build Coastguard Worker        scale_input     d1[0], q8,  q9, q10, q11, q12, q13, q14, q15
1443*c0909341SAndroid Build Coastguard Worker.endif
1444*c0909341SAndroid Build Coastguard Worker.if \identity
1445*c0909341SAndroid Build Coastguard Worker.if \shift == -2
1446*c0909341SAndroid Build Coastguard Worker        identity_4x16_shift2 d0[0]
1447*c0909341SAndroid Build Coastguard Worker.else
1448*c0909341SAndroid Build Coastguard Worker        identity_4x16_shift1 d0[0]
1449*c0909341SAndroid Build Coastguard Worker.endif
1450*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x4_epilog)
1451*c0909341SAndroid Build Coastguard Worker.else
1452*c0909341SAndroid Build Coastguard Worker        blx             r4
1453*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1454*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \i,  \i,  #\shift
1455*c0909341SAndroid Build Coastguard Worker.endr
1456*c0909341SAndroid Build Coastguard Worker.if \shift == 1
1457*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x4_epilog)
1458*c0909341SAndroid Build Coastguard Worker.else
1459*c0909341SAndroid Build Coastguard WorkerL(horz_16x4_epilog):
1460*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
1461*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
1462*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q12, q13, d24, d25, d26, d27
1463*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q14, q15, d28, d29, d30, d31
1464*c0909341SAndroid Build Coastguard Worker
1465*c0909341SAndroid Build Coastguard Worker.irp i, d16, d20, d24, d28, d17, d21, d25, d29, d18, d22, d26, d30, d19, d23, d27, d31
1466*c0909341SAndroid Build Coastguard Worker        vst1.16         {\i}, [r6, :64]!
1467*c0909341SAndroid Build Coastguard Worker.endr
1468*c0909341SAndroid Build Coastguard Worker
1469*c0909341SAndroid Build Coastguard Worker        pop             {pc}
1470*c0909341SAndroid Build Coastguard Worker.endif
1471*c0909341SAndroid Build Coastguard Worker.endif
1472*c0909341SAndroid Build Coastguard Workerendfunc
1473*c0909341SAndroid Build Coastguard Worker.endm
1474*c0909341SAndroid Build Coastguard Worker
1475*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, identity=1, shift=-1, suffix=_scale_identity
1476*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, identity=1, shift=-2, suffix=_identity
1477*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, identity=0, shift=1, suffix=_scale
1478*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, identity=0, shift=2
1479*c0909341SAndroid Build Coastguard Worker
1480*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_4x16_neon
1481*c0909341SAndroid Build Coastguard Worker        push            {lr}
1482*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
1483*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
1484*c0909341SAndroid Build Coastguard Worker.endr
1485*c0909341SAndroid Build Coastguard Worker        blx             r5
1486*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 r6, r7
1487*c0909341SAndroid Build Coastguard Worker        pop             {pc}
1488*c0909341SAndroid Build Coastguard Workerendfunc
1489*c0909341SAndroid Build Coastguard Worker
1490*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x16_neon
1491*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
1492*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1493*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
1494*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
1495*c0909341SAndroid Build Coastguard Worker.if \i > 0
1496*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
1497*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
1498*c0909341SAndroid Build Coastguard Worker        blt             1f
1499*c0909341SAndroid Build Coastguard Worker.if \i < 12
1500*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
1501*c0909341SAndroid Build Coastguard Worker.endif
1502*c0909341SAndroid Build Coastguard Worker.endif
1503*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
1504*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
1505*c0909341SAndroid Build Coastguard Worker        blx             r9
1506*c0909341SAndroid Build Coastguard Worker.endr
1507*c0909341SAndroid Build Coastguard Worker        b               3f
1508*c0909341SAndroid Build Coastguard Worker1:
1509*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1510*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1511*c0909341SAndroid Build Coastguard Worker2:
1512*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
1513*c0909341SAndroid Build Coastguard Worker.rept 4
1514*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1515*c0909341SAndroid Build Coastguard Worker.endr
1516*c0909341SAndroid Build Coastguard Worker        bgt             2b
1517*c0909341SAndroid Build Coastguard Worker3:
1518*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
1519*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
1520*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1521*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32
1522*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
1523*c0909341SAndroid Build Coastguard Worker.endr
1524*c0909341SAndroid Build Coastguard Worker
1525*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
1526*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
1527*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1528*c0909341SAndroid Build Coastguard Workerendfunc
1529*c0909341SAndroid Build Coastguard Worker
1530*c0909341SAndroid Build Coastguard Workerconst eob_16x16
1531*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 256
1532*c0909341SAndroid Build Coastguard Workerendconst
1533*c0909341SAndroid Build Coastguard Worker
1534*c0909341SAndroid Build Coastguard Workerconst eob_16x16_identity
1535*c0909341SAndroid Build Coastguard Worker        .short 4, 8, 12, 256
1536*c0909341SAndroid Build Coastguard Workerendconst
1537*c0909341SAndroid Build Coastguard Worker
1538*c0909341SAndroid Build Coastguard Worker.macro def_fn_16x16 txfm1, txfm2
1539*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_16x16_8bpc_neon, export=1
1540*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1541*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  16,  2
1542*c0909341SAndroid Build Coastguard Worker.endif
1543*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1544*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
1545*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1546*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  inv_txfm_horz_identity_16x4_neon
1547*c0909341SAndroid Build Coastguard Worker.else
1548*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  inv_txfm_horz_16x4_neon
1549*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4h_x16_neon
1550*c0909341SAndroid Build Coastguard Worker.endif
1551*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_4h_x16_neon
1552*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1553*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1554*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16
1555*c0909341SAndroid Build Coastguard Worker.else
1556*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16_identity
1557*c0909341SAndroid Build Coastguard Worker.endif
1558*c0909341SAndroid Build Coastguard Worker.else
1559*c0909341SAndroid Build Coastguard Worker.ifc \txfm2, identity
1560*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16_identity
1561*c0909341SAndroid Build Coastguard Worker.else
1562*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x16
1563*c0909341SAndroid Build Coastguard Worker.endif
1564*c0909341SAndroid Build Coastguard Worker.endif
1565*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_16x16_neon
1566*c0909341SAndroid Build Coastguard Workerendfunc
1567*c0909341SAndroid Build Coastguard Worker.endm
1568*c0909341SAndroid Build Coastguard Worker
1569*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, dct
1570*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, identity
1571*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, adst
1572*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, flipadst
1573*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, identity
1574*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, dct
1575*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, adst
1576*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, flipadst
1577*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, dct
1578*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, adst
1579*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, flipadst
1580*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, dct
1581*c0909341SAndroid Build Coastguard Worker
1582*c0909341SAndroid Build Coastguard Worker.macro def_fn_416_base variant
1583*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_16x4_neon
1584*c0909341SAndroid Build Coastguard Worker
1585*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1586*c0909341SAndroid Build Coastguard Worker        vmov.i16        d4,  #0
1587*c0909341SAndroid Build Coastguard Worker.irp i, d16, d18, d20, d22
1588*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :64]
1589*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r2, :64]!
1590*c0909341SAndroid Build Coastguard Worker.endr
1591*c0909341SAndroid Build Coastguard Worker.irp i, d17, d19, d21, d23
1592*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :64]
1593*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r2, :64]!
1594*c0909341SAndroid Build Coastguard Worker.endr
1595*c0909341SAndroid Build Coastguard Worker        movw            r12, #2*(5793-4096)*8
1596*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1597*c0909341SAndroid Build Coastguard Worker.irp i, d24, d26, d28, d30
1598*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :64]
1599*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r2, :64]!
1600*c0909341SAndroid Build Coastguard Worker.endr
1601*c0909341SAndroid Build Coastguard Worker.irp i, d25, d27, d29, d31
1602*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :64]
1603*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r2, :64]!
1604*c0909341SAndroid Build Coastguard Worker.endr
1605*c0909341SAndroid Build Coastguard Worker
1606*c0909341SAndroid Build Coastguard Worker        identity_4x16_shift1 d0[0]
1607*c0909341SAndroid Build Coastguard Worker
1608*c0909341SAndroid Build Coastguard Worker        b               L(itx_16x4_epilog)
1609*c0909341SAndroid Build Coastguard Worker.else
1610*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1611*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1612*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]
1613*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3}, [r2, :128]!
1614*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2, :128]
1615*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3}, [r2, :128]!
1616*c0909341SAndroid Build Coastguard Worker        vld1.16         {d24, d25, d26, d27}, [r2, :128]
1617*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3}, [r2, :128]!
1618*c0909341SAndroid Build Coastguard Worker        vld1.16         {d28, d29, d30, d31}, [r2, :128]
1619*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3}, [r2, :128]!
1620*c0909341SAndroid Build Coastguard Worker
1621*c0909341SAndroid Build Coastguard Worker        blx             r4
1622*c0909341SAndroid Build Coastguard Worker
1623*c0909341SAndroid Build Coastguard Worker        vswp            d17, d20
1624*c0909341SAndroid Build Coastguard Worker        vswp            d19, d22
1625*c0909341SAndroid Build Coastguard Worker        vswp            d18, d20
1626*c0909341SAndroid Build Coastguard Worker        vswp            d19, d21
1627*c0909341SAndroid Build Coastguard Worker        vswp            d25, d28
1628*c0909341SAndroid Build Coastguard Worker        vswp            d27, d30
1629*c0909341SAndroid Build Coastguard Worker        vswp            d26, d28
1630*c0909341SAndroid Build Coastguard Worker        vswp            d27, d29
1631*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
1632*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \i,  \i,  #1
1633*c0909341SAndroid Build Coastguard Worker.endr
1634*c0909341SAndroid Build Coastguard Worker
1635*c0909341SAndroid Build Coastguard WorkerL(itx_16x4_epilog):
1636*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
1637*c0909341SAndroid Build Coastguard Worker        blx             r5
1638*c0909341SAndroid Build Coastguard Worker        mov             r6,  r0
1639*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r6, r7
1640*c0909341SAndroid Build Coastguard Worker
1641*c0909341SAndroid Build Coastguard Worker        vmov            q8,  q12
1642*c0909341SAndroid Build Coastguard Worker        vmov            q9,  q13
1643*c0909341SAndroid Build Coastguard Worker        vmov            q10, q14
1644*c0909341SAndroid Build Coastguard Worker        vmov            q11, q15
1645*c0909341SAndroid Build Coastguard Worker
1646*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
1647*c0909341SAndroid Build Coastguard Worker        blx             r5
1648*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #8
1649*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 r6, r7
1650*c0909341SAndroid Build Coastguard Worker
1651*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1652*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1653*c0909341SAndroid Build Coastguard Worker.endif
1654*c0909341SAndroid Build Coastguard Workerendfunc
1655*c0909341SAndroid Build Coastguard Worker
1656*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_4x16_neon
1657*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1658*c0909341SAndroid Build Coastguard Worker
1659*c0909341SAndroid Build Coastguard Worker        mov             r11, #32
1660*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
1661*c0909341SAndroid Build Coastguard Worker        blt             1f
1662*c0909341SAndroid Build Coastguard Worker
1663*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #16
1664*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1665*c0909341SAndroid Build Coastguard Worker.irp i, q12, q13, q14, q15
1666*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r6, :128]
1667*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r6, :128], r11
1668*c0909341SAndroid Build Coastguard Worker.endr
1669*c0909341SAndroid Build Coastguard Worker        movw            r12, #(5793-4096)*8
1670*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1671*c0909341SAndroid Build Coastguard Worker        identity_8x4_shift1 q12, q13, q14, q15, d0[0]
1672*c0909341SAndroid Build Coastguard Worker.else
1673*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1674*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r6, :128]
1675*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r6, :128], r11
1676*c0909341SAndroid Build Coastguard Worker.endr
1677*c0909341SAndroid Build Coastguard Worker        blx             r4
1678*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q12, q8,  #1
1679*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q13, q9,  #1
1680*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q14, q10, #1
1681*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q15, q11, #1
1682*c0909341SAndroid Build Coastguard Worker.endif
1683*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q12, q13, q14, q15
1684*c0909341SAndroid Build Coastguard Worker        vswp            d27, d29
1685*c0909341SAndroid Build Coastguard Worker        vswp            d26, d28
1686*c0909341SAndroid Build Coastguard Worker        vswp            d27, d30
1687*c0909341SAndroid Build Coastguard Worker        vswp            d25, d28
1688*c0909341SAndroid Build Coastguard Worker
1689*c0909341SAndroid Build Coastguard Worker        b               2f
1690*c0909341SAndroid Build Coastguard Worker1:
1691*c0909341SAndroid Build Coastguard Worker.irp i, q12, q13, q14, q15
1692*c0909341SAndroid Build Coastguard Worker        vmov.i16        \i,  #0
1693*c0909341SAndroid Build Coastguard Worker.endr
1694*c0909341SAndroid Build Coastguard Worker2:
1695*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1696*c0909341SAndroid Build Coastguard Worker.irp i, q8,  q9,  q10, q11
1697*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
1698*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r2, :128], r11
1699*c0909341SAndroid Build Coastguard Worker.endr
1700*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1701*c0909341SAndroid Build Coastguard Worker        movw            r12, #(5793-4096)*8
1702*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1703*c0909341SAndroid Build Coastguard Worker        identity_8x4_shift1 q8,  q9,  q10, q11, d0[0]
1704*c0909341SAndroid Build Coastguard Worker
1705*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x16_epilog)
1706*c0909341SAndroid Build Coastguard Worker.else
1707*c0909341SAndroid Build Coastguard Worker        blx             r4
1708*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11
1709*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \i,  \i,  #1
1710*c0909341SAndroid Build Coastguard Worker.endr
1711*c0909341SAndroid Build Coastguard WorkerL(itx_4x16_epilog):
1712*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  q8,  q9,  q10, q11
1713*c0909341SAndroid Build Coastguard Worker        vswp            d19, d21
1714*c0909341SAndroid Build Coastguard Worker        vswp            d18, d20
1715*c0909341SAndroid Build Coastguard Worker        vswp            d19, d22
1716*c0909341SAndroid Build Coastguard Worker        vswp            d17, d20
1717*c0909341SAndroid Build Coastguard Worker
1718*c0909341SAndroid Build Coastguard Worker        blx             r5
1719*c0909341SAndroid Build Coastguard Worker
1720*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 r0, r6
1721*c0909341SAndroid Build Coastguard Worker
1722*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1723*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1724*c0909341SAndroid Build Coastguard Worker.endif
1725*c0909341SAndroid Build Coastguard Workerendfunc
1726*c0909341SAndroid Build Coastguard Worker.endm
1727*c0909341SAndroid Build Coastguard Worker
1728*c0909341SAndroid Build Coastguard Workerdef_fn_416_base identity_
1729*c0909341SAndroid Build Coastguard Workerdef_fn_416_base
1730*c0909341SAndroid Build Coastguard Worker
1731*c0909341SAndroid Build Coastguard Worker.macro def_fn_416 w, h, txfm1, txfm2, eob_half
1732*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
1733*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1734*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1735*c0909341SAndroid Build Coastguard Worker.endif
1736*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1737*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1738*c0909341SAndroid Build Coastguard Worker.if \w == 4
1739*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1740*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_8h_x\w\()_neon
1741*c0909341SAndroid Build Coastguard Worker.endif
1742*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_4h_x\h\()_neon
1743*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_half
1744*c0909341SAndroid Build Coastguard Worker.else
1745*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1746*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4h_x\w\()_neon
1747*c0909341SAndroid Build Coastguard Worker.endif
1748*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_8h_x\h\()_neon
1749*c0909341SAndroid Build Coastguard Worker.endif
1750*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1751*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_\w\()x\h\()_neon
1752*c0909341SAndroid Build Coastguard Worker.else
1753*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1754*c0909341SAndroid Build Coastguard Worker.endif
1755*c0909341SAndroid Build Coastguard Workerendfunc
1756*c0909341SAndroid Build Coastguard Worker.endm
1757*c0909341SAndroid Build Coastguard Worker
1758*c0909341SAndroid Build Coastguard Worker.macro def_fns_416 w, h
1759*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, dct, 29
1760*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, identity, 29
1761*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, adst, 29
1762*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, flipadst, 29
1763*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, identity, 8
1764*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, dct, 29
1765*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, adst, 29
1766*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, flipadst, 29
1767*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, dct, 29
1768*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, adst, 29
1769*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, flipadst, 29
1770*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, dct, 32
1771*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, identity, 8
1772*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, identity, 8
1773*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, adst, 32
1774*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, flipadst, 32
1775*c0909341SAndroid Build Coastguard Worker.endm
1776*c0909341SAndroid Build Coastguard Worker
1777*c0909341SAndroid Build Coastguard Workerdef_fns_416 4, 16
1778*c0909341SAndroid Build Coastguard Workerdef_fns_416 16, 4
1779*c0909341SAndroid Build Coastguard Worker
1780*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x8_neon
1781*c0909341SAndroid Build Coastguard Worker        sub_sp_align    256
1782*c0909341SAndroid Build Coastguard Worker
1783*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
1784*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
1785*c0909341SAndroid Build Coastguard Worker.if \i > 0
1786*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
1787*c0909341SAndroid Build Coastguard Worker        blt             1f
1788*c0909341SAndroid Build Coastguard Worker.endif
1789*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
1790*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*2
1791*c0909341SAndroid Build Coastguard Worker        blx             r9
1792*c0909341SAndroid Build Coastguard Worker.endr
1793*c0909341SAndroid Build Coastguard Worker        b               2f
1794*c0909341SAndroid Build Coastguard Worker1:
1795*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1796*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1797*c0909341SAndroid Build Coastguard Worker.rept 4
1798*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1799*c0909341SAndroid Build Coastguard Worker.endr
1800*c0909341SAndroid Build Coastguard Worker2:
1801*c0909341SAndroid Build Coastguard Worker
1802*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1803*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1804*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32
1805*c0909341SAndroid Build Coastguard Worker.irp j, q8, q9, q10, q11, q12, q13, q14, q15
1806*c0909341SAndroid Build Coastguard Worker        vld1.16         {\j}, [r7, :128], r8
1807*c0909341SAndroid Build Coastguard Worker.endr
1808*c0909341SAndroid Build Coastguard Worker        blx             r5
1809*c0909341SAndroid Build Coastguard Worker
1810*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
1811*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r6, r7
1812*c0909341SAndroid Build Coastguard Worker.endr
1813*c0909341SAndroid Build Coastguard Worker
1814*c0909341SAndroid Build Coastguard Worker        add_sp_align    256
1815*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1816*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1817*c0909341SAndroid Build Coastguard Workerendfunc
1818*c0909341SAndroid Build Coastguard Worker
1819*c0909341SAndroid Build Coastguard Worker.macro def_fn_816_base variant
1820*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_8x16_neon
1821*c0909341SAndroid Build Coastguard Worker        sub_sp_align    256
1822*c0909341SAndroid Build Coastguard Worker
1823*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1824*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*8*2)
1825*c0909341SAndroid Build Coastguard Worker.if \i > 0
1826*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r10
1827*c0909341SAndroid Build Coastguard Worker        blt             1f
1828*c0909341SAndroid Build Coastguard Worker.endif
1829*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
1830*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
1831*c0909341SAndroid Build Coastguard Worker
1832*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1833*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
1834*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
1835*c0909341SAndroid Build Coastguard Worker
1836*c0909341SAndroid Build Coastguard Worker.irp j, q8, q9, q10, q11, q12, q13, q14, q15
1837*c0909341SAndroid Build Coastguard Worker        vld1.16         {\j}, [r7, :128]
1838*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r7, :128], r8
1839*c0909341SAndroid Build Coastguard Worker.endr
1840*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9,  q10, q11, q12, q13, q14, q15
1841*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1842*c0909341SAndroid Build Coastguard Worker        // The identity shl #1 and downshift vrshr #1 cancel out
1843*c0909341SAndroid Build Coastguard Worker.else
1844*c0909341SAndroid Build Coastguard Worker        blx             r4
1845*c0909341SAndroid Build Coastguard Worker.irp j, q8, q9, q10, q11, q12, q13, q14, q15
1846*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \j,  \j,  #1
1847*c0909341SAndroid Build Coastguard Worker.endr
1848*c0909341SAndroid Build Coastguard Worker.endif
1849*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
1850*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r6, :128]!
1851*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r6, :128]!
1852*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r6, :128]!
1853*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r6, :128]!
1854*c0909341SAndroid Build Coastguard Worker.endr
1855*c0909341SAndroid Build Coastguard Worker        b               2f
1856*c0909341SAndroid Build Coastguard Worker1:
1857*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
1858*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
1859*c0909341SAndroid Build Coastguard Worker.rept 4
1860*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
1861*c0909341SAndroid Build Coastguard Worker.endr
1862*c0909341SAndroid Build Coastguard Worker2:
1863*c0909341SAndroid Build Coastguard Worker
1864*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1865*c0909341SAndroid Build Coastguard Worker        b               L(itx_8x16_epilog)
1866*c0909341SAndroid Build Coastguard Worker.else
1867*c0909341SAndroid Build Coastguard WorkerL(itx_8x16_epilog):
1868*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
1869*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
1870*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
1871*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16
1872*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
1873*c0909341SAndroid Build Coastguard Worker.endr
1874*c0909341SAndroid Build Coastguard Worker
1875*c0909341SAndroid Build Coastguard Worker        add_sp_align    256
1876*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1877*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
1878*c0909341SAndroid Build Coastguard Worker.endif
1879*c0909341SAndroid Build Coastguard Workerendfunc
1880*c0909341SAndroid Build Coastguard Worker.endm
1881*c0909341SAndroid Build Coastguard Worker
1882*c0909341SAndroid Build Coastguard Workerdef_fn_816_base identity_
1883*c0909341SAndroid Build Coastguard Workerdef_fn_816_base
1884*c0909341SAndroid Build Coastguard Worker
1885*c0909341SAndroid Build Coastguard Worker/* Define symbols used in .if statement */
1886*c0909341SAndroid Build Coastguard Worker.equ dct, 1
1887*c0909341SAndroid Build Coastguard Worker.equ identity, 2
1888*c0909341SAndroid Build Coastguard Worker.equ adst, 3
1889*c0909341SAndroid Build Coastguard Worker.equ flipadst, 4
1890*c0909341SAndroid Build Coastguard Worker
1891*c0909341SAndroid Build Coastguard Worker.macro def_fn_816 w, h, txfm1, txfm2, eob_8x8, eob_4x4
1892*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
1893*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1894*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1895*c0909341SAndroid Build Coastguard Worker.endif
1896*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
1897*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1898*c0909341SAndroid Build Coastguard Worker.if \w == 8
1899*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1900*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_8h_x8_neon
1901*c0909341SAndroid Build Coastguard Worker.endif
1902*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_4h_x16_neon
1903*c0909341SAndroid Build Coastguard Worker.else
1904*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1905*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  inv_txfm_horz_scale_identity_16x4_neon
1906*c0909341SAndroid Build Coastguard Worker.else
1907*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_\txfm1\()_4h_x16_neon
1908*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  inv_txfm_horz_scale_16x4_neon
1909*c0909341SAndroid Build Coastguard Worker.endif
1910*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_\txfm2\()_8h_x8_neon
1911*c0909341SAndroid Build Coastguard Worker.endif
1912*c0909341SAndroid Build Coastguard Worker.if \w == 8
1913*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_8x8
1914*c0909341SAndroid Build Coastguard Worker.else
1915*c0909341SAndroid Build Coastguard Worker        mov             r10, #\eob_4x4
1916*c0909341SAndroid Build Coastguard Worker.endif
1917*c0909341SAndroid Build Coastguard Worker.if \w == 8 && \txfm1 == identity
1918*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_\w\()x\h\()_neon
1919*c0909341SAndroid Build Coastguard Worker.else
1920*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1921*c0909341SAndroid Build Coastguard Worker.endif
1922*c0909341SAndroid Build Coastguard Workerendfunc
1923*c0909341SAndroid Build Coastguard Worker.endm
1924*c0909341SAndroid Build Coastguard Worker
1925*c0909341SAndroid Build Coastguard Worker.macro def_fns_816 w, h
1926*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, dct, 43, 10
1927*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, identity, 43, 10
1928*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, adst, 43, 10
1929*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, flipadst, 43, 10
1930*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, identity, 8, 4
1931*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, dct, 43, 10
1932*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, adst, 43, 10
1933*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, flipadst, 43, 10
1934*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, dct, 43, 10
1935*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, adst, 43, 10
1936*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, flipadst, 43, 10
1937*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, dct, 64, 4
1938*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, identity, 8, 4
1939*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, identity, 8, 4
1940*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, adst, 64, 4
1941*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, flipadst, 64, 4
1942*c0909341SAndroid Build Coastguard Worker.endm
1943*c0909341SAndroid Build Coastguard Worker
1944*c0909341SAndroid Build Coastguard Workerdef_fns_816 8, 16
1945*c0909341SAndroid Build Coastguard Workerdef_fns_816 16, 8
1946*c0909341SAndroid Build Coastguard Worker
1947*c0909341SAndroid Build Coastguard Workerfunction inv_dct32_odd_4h_x16_neon, export=1
1948*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs, 2*16
1949*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r12, :128]
1950*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #2*16
1951*c0909341SAndroid Build Coastguard Worker
1952*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d16, d31, d0[0], d0[1] // -> t16a
1953*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d16, d31, d0[1], d0[0] // -> t31a
1954*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d24, d23, d0[2], d0[3] // -> t17a
1955*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d16, q2,  #12               // t16a
1956*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q3,  #12               // t31a
1957*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d24, d23, d0[3], d0[2] // -> t30a
1958*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d20, d27, d1[0], d1[1] // -> t18a
1959*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q4,  #12               // t17a
1960*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q2,  #12               // t30a
1961*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d20, d27, d1[1], d1[0] // -> t29a
1962*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d28, d19, d1[2], d1[3] // -> t19a
1963*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q3,  #12               // t18a
1964*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q4,  #12               // t29a
1965*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d28, d19, d1[3], d1[2] // -> t28a
1966*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d18, d29, d2[0], d2[1] // -> t20a
1967*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q2,  #12               // t19a
1968*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q3,  #12               // t28a
1969*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d18, d29, d2[1], d2[0] // -> t27a
1970*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d26, d21, d2[2], d2[3] // -> t21a
1971*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q4,  #12               // t20a
1972*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q2,  #12               // t27a
1973*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d26, d21, d2[3], d2[2] // -> t26a
1974*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d22, d25, d3[0], d3[1] // -> t22a
1975*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q3,  #12               // t21a
1976*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q4,  #12               // t26a
1977*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d22, d25, d3[1], d3[0] // -> t25a
1978*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d30, d17, d3[2], d3[3] // -> t23a
1979*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q2,  #12               // t22a
1980*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q3,  #12               // t25a
1981*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d30, d17, d3[3], d3[2] // -> t24a
1982*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q4,  #12               // t23a
1983*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q2,  #12               // t24a
1984*c0909341SAndroid Build Coastguard Worker
1985*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r12, :128]
1986*c0909341SAndroid Build Coastguard Worker
1987*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d16, d24 // t17
1988*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d16, d24 // t16
1989*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d31, d23 // t30
1990*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d23 // t31
1991*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d24, d28, d20 // t18
1992*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d28, d28, d20 // t19
1993*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d18, d26 // t20
1994*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d18, d18, d26 // t21
1995*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d30, d22 // t22
1996*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d30, d30, d22 // t23
1997*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d26, d17, d25 // t24
1998*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d17, d17, d25 // t25
1999*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d22, d29, d21 // t26
2000*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d29, d29, d21 // t27
2001*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d25, d19, d27 // t28
2002*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d19, d19, d27 // t29
2003*c0909341SAndroid Build Coastguard Worker
2004*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d3,  d2,  d1[0], d1[1] // -> t17a
2005*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d3,  d2,  d1[1], d1[0] // -> t30a
2006*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d19, d24, d1[1], d1[0] // -> t18a
2007*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q2,  #12               // t17a
2008*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q3,  #12               // t30a
2009*c0909341SAndroid Build Coastguard Worker        vneg.s32        q4,  q4                     // -> t18a
2010*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q1,  d19, d24, d1[0], d1[1] // -> t29a
2011*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d22, d18, d1[2], d1[3] // -> t21a
2012*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q4,  #12               // t18a
2013*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q1,  #12               // t29a
2014*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d22, d18, d1[3], d1[2] // -> t26a
2015*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d17, d20, d1[3], d1[2] // -> t22a
2016*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q2,  #12               // t21a
2017*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q3,  #12               // t26a
2018*c0909341SAndroid Build Coastguard Worker        vneg.s32        q4,  q4                     // -> t22a
2019*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q1,  d17, d20, d1[2], d1[3] // -> t25a
2020*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d17, q4,  #12               // t22a
2021*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q1,  #12               // t25a
2022*c0909341SAndroid Build Coastguard Worker
2023*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d27, d24 // t29
2024*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d27, d27, d24 // t30
2025*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d21, d19 // t18
2026*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d21, d21, d19 // t17
2027*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d24, d16, d28 // t19a
2028*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d16, d28 // t16a
2029*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d19, d30, d23 // t20a
2030*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d30, d30, d23 // t23a
2031*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d28, d17, d22 // t21
2032*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d17, d22 // t22
2033*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d26, d29 // t24a
2034*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d26, d26, d29 // t27a
2035*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d22, d20, d18 // t25
2036*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d20, d18 // t26
2037*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d31, d25 // t28a
2038*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d25 // t31a
2039*c0909341SAndroid Build Coastguard Worker
2040*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d2,  d3,  d0[2], d0[3] // -> t18a
2041*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d2,  d3,  d0[3], d0[2] // -> t29a
2042*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d29, d24, d0[2], d0[3] // -> t19
2043*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q2,  #12               // t18a
2044*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q3,  #12               // t29a
2045*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q1,  d29, d24, d0[3], d0[2] // -> t28
2046*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d26, d19, d0[3], d0[2] // -> t20
2047*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q4,  #12               // t19
2048*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q1,  #12               // t28
2049*c0909341SAndroid Build Coastguard Worker        vneg.s32        q2,  q2                     // -> t20
2050*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d26, d19, d0[2], d0[3] // -> t27
2051*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d20, d28, d0[3], d0[2] // -> t21a
2052*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q2,  #12               // t20
2053*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q3,  #12               // t27
2054*c0909341SAndroid Build Coastguard Worker        vneg.s32        q4,  q4                     // -> t21a
2055*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q1,  d20, d28, d0[2], d0[3] // -> t26a
2056*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q4,  #12               // t21a
2057*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q1,  #12               // t26a
2058*c0909341SAndroid Build Coastguard Worker
2059*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  d16, d30 // t23
2060*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d16, d30 // t16 = out16
2061*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  d31, d23 // t24
2062*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d31, d23 // t31 = out31
2063*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d23, d21, d17 // t22a
2064*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d21, d17 // t17a = out17
2065*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d30, d27, d22 // t30a = out30
2066*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d27, d22 // t25a
2067*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d27, d18, d20 // t21
2068*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d18, d18, d20 // t18 = out18
2069*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d4,  d29, d26 // t19a = out19
2070*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d26, d29, d26 // t20a
2071*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d29, d25, d28 // t29 = out29
2072*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d25, d28 // t26
2073*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d28, d24, d19 // t28a = out28
2074*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d24, d24, d19 // t27a
2075*c0909341SAndroid Build Coastguard Worker        vmov            d19, d4       // out19
2076*c0909341SAndroid Build Coastguard Worker
2077*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d24, d26, d0[0], d0[0] // -> t20
2078*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d24, d26, d0[0], d0[0] // -> t27
2079*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q2,  #12   // t20
2080*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q3,  #12   // t27
2081*c0909341SAndroid Build Coastguard Worker
2082*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d25, d27, d0[0], d0[0] // -> t26a
2083*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d25, d27, d0[0], d0[0] // -> t21a
2084*c0909341SAndroid Build Coastguard Worker        vmov            d27, d22        // t27
2085*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q2,  #12   // t26a
2086*c0909341SAndroid Build Coastguard Worker
2087*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q12, d21, d23, d0[0], d0[0] // -> t22
2088*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d21, d23, d0[0], d0[0] // -> t25
2089*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q3,  #12   // t21a
2090*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d22, q12, #12   // t22
2091*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q2,  #12   // t25
2092*c0909341SAndroid Build Coastguard Worker
2093*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d3,  d2,  d0[0], d0[0] // -> t23a
2094*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d3,  d2,  d0[0], d0[0] // -> t24a
2095*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d23, q2,  #12   // t23a
2096*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d24, q3,  #12   // t24a
2097*c0909341SAndroid Build Coastguard Worker
2098*c0909341SAndroid Build Coastguard Worker        bx              lr
2099*c0909341SAndroid Build Coastguard Workerendfunc
2100*c0909341SAndroid Build Coastguard Worker
2101*c0909341SAndroid Build Coastguard Worker.macro def_horz_32 scale=0, shift=2, suffix
2102*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_dct_32x4_neon
2103*c0909341SAndroid Build Coastguard Worker        push            {lr}
2104*c0909341SAndroid Build Coastguard Worker        vmov.i16        d7,  #0
2105*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
2106*c0909341SAndroid Build Coastguard Worker.if \scale
2107*c0909341SAndroid Build Coastguard Worker        movw            r12, #2896*8
2108*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  r12
2109*c0909341SAndroid Build Coastguard Worker.endif
2110*c0909341SAndroid Build Coastguard Worker
2111*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2112*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64]
2113*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [r7, :64], r8
2114*c0909341SAndroid Build Coastguard Worker.endr
2115*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2116*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
2117*c0909341SAndroid Build Coastguard Worker.if \scale
2118*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9,  q10, q11, q12, q13, q14, q15
2119*c0909341SAndroid Build Coastguard Worker.endif
2120*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4h_x16_neon
2121*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
2122*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
2123*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q12, q13, d24, d25, d26, d27
2124*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q14, q15, d28, d29, d30, d31
2125*c0909341SAndroid Build Coastguard Worker
2126*c0909341SAndroid Build Coastguard Worker.macro store1 r0, r1, r2, r3
2127*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r0}, [r6, :64]!
2128*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r1}, [r6, :64]!
2129*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r2}, [r6, :64]!
2130*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r3}, [r6, :64]!
2131*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #32
2132*c0909341SAndroid Build Coastguard Worker.endm
2133*c0909341SAndroid Build Coastguard Worker        store1          d16, d20, d24, d28
2134*c0909341SAndroid Build Coastguard Worker        store1          d17, d21, d25, d29
2135*c0909341SAndroid Build Coastguard Worker        store1          d18, d22, d26, d30
2136*c0909341SAndroid Build Coastguard Worker        store1          d19, d23, d27, d31
2137*c0909341SAndroid Build Coastguard Worker.purgem store1
2138*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #64*4
2139*c0909341SAndroid Build Coastguard Worker
2140*c0909341SAndroid Build Coastguard Worker        vmov.i16        d7,  #0
2141*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2142*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64]
2143*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [r7, :64], r8
2144*c0909341SAndroid Build Coastguard Worker.endr
2145*c0909341SAndroid Build Coastguard Worker.if \scale
2146*c0909341SAndroid Build Coastguard Worker        // This relies on the fact that the idct also leaves the right coeff in d0[1]
2147*c0909341SAndroid Build Coastguard Worker        scale_input     d0[1], q8,  q9,  q10, q11, q12, q13, q14, q15
2148*c0909341SAndroid Build Coastguard Worker.endif
2149*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_4h_x16_neon
2150*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q15, q14, d31, d30, d29, d28
2151*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q13, q12, d27, d26, d25, d24
2152*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q11, q10, d23, d22, d21, d20
2153*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q9,  q8,  d19, d18, d17, d16
2154*c0909341SAndroid Build Coastguard Worker.macro store2 r0, r1, r2, r3, shift
2155*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r6, :128]
2156*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d7,  d0,  \r0
2157*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d0,  d0,  \r0
2158*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d6,  d1,  \r1
2159*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d1,  d1,  \r1
2160*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d5,  d2,  \r2
2161*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d2,  d2,  \r2
2162*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d4,  d3,  \r3
2163*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d3,  d3,  \r3
2164*c0909341SAndroid Build Coastguard Worker        vrev64.16       q2,  q2
2165*c0909341SAndroid Build Coastguard Worker        vrev64.16       q3,  q3
2166*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q0,  q0,  #\shift
2167*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q1,  q1,  #\shift
2168*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #\shift
2169*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #\shift
2170*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r6, :128]!
2171*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2172*c0909341SAndroid Build Coastguard Worker.endm
2173*c0909341SAndroid Build Coastguard Worker
2174*c0909341SAndroid Build Coastguard Worker        store2          d31, d27, d23, d19, \shift
2175*c0909341SAndroid Build Coastguard Worker        store2          d30, d26, d22, d18, \shift
2176*c0909341SAndroid Build Coastguard Worker        store2          d29, d25, d21, d17, \shift
2177*c0909341SAndroid Build Coastguard Worker        store2          d28, d24, d20, d16, \shift
2178*c0909341SAndroid Build Coastguard Worker.purgem store2
2179*c0909341SAndroid Build Coastguard Worker        pop             {pc}
2180*c0909341SAndroid Build Coastguard Workerendfunc
2181*c0909341SAndroid Build Coastguard Worker.endm
2182*c0909341SAndroid Build Coastguard Worker
2183*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=0, shift=2
2184*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=1, shift=1, suffix=_scale
2185*c0909341SAndroid Build Coastguard Worker
2186*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_4x32_neon
2187*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
2188*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
2189*c0909341SAndroid Build Coastguard Worker
2190*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2191*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
2192*c0909341SAndroid Build Coastguard Worker.endr
2193*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2194*c0909341SAndroid Build Coastguard Worker
2195*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4h_x16_neon
2196*c0909341SAndroid Build Coastguard Worker
2197*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2198*c0909341SAndroid Build Coastguard Worker        vst1.16         {\i}, [r7, :64], r8
2199*c0909341SAndroid Build Coastguard Worker.endr
2200*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2201*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
2202*c0909341SAndroid Build Coastguard Worker
2203*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31
2204*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :64], r8
2205*c0909341SAndroid Build Coastguard Worker.endr
2206*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #4
2207*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsr #1
2208*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_4h_x16_neon
2209*c0909341SAndroid Build Coastguard Worker
2210*c0909341SAndroid Build Coastguard Worker        neg             r9,  r8
2211*c0909341SAndroid Build Coastguard Worker        mov             r10, r6
2212*c0909341SAndroid Build Coastguard Worker.macro combine r0, r1, r2, r3, op, stride
2213*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4},    [r7,  :64], \stride
2214*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[0]}, [r10, :32], r1
2215*c0909341SAndroid Build Coastguard Worker        vld1.16         {d5},    [r7,  :64],  \stride
2216*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[1]}, [r10, :32], r1
2217*c0909341SAndroid Build Coastguard Worker        \op\().s16      d4,  d4,  \r0
2218*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6},    [r7,  :64], \stride
2219*c0909341SAndroid Build Coastguard Worker        vld1.32         {d3[0]}, [r10, :32], r1
2220*c0909341SAndroid Build Coastguard Worker        \op\().s16      d5,  d5,  \r1
2221*c0909341SAndroid Build Coastguard Worker        vld1.32         {d3[1]}, [r10, :32], r1
2222*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
2223*c0909341SAndroid Build Coastguard Worker        \op\().s16      d6,  d6,  \r2
2224*c0909341SAndroid Build Coastguard Worker        vld1.16         {d7},    [r7,  :64], \stride
2225*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q2,  q2,  d2
2226*c0909341SAndroid Build Coastguard Worker        \op\().s16      d7,  d7,  \r3
2227*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d2,  q2
2228*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
2229*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2[0]}, [r6,  :32], r1
2230*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q3,  q3,  d3
2231*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2[1]}, [r6,  :32], r1
2232*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d3,  q3
2233*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3[0]}, [r6,  :32], r1
2234*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3[1]}, [r6,  :32], r1
2235*c0909341SAndroid Build Coastguard Worker.endm
2236*c0909341SAndroid Build Coastguard Worker        combine         d31, d30, d29, d28, vqadd, r8
2237*c0909341SAndroid Build Coastguard Worker        combine         d27, d26, d25, d24, vqadd, r8
2238*c0909341SAndroid Build Coastguard Worker        combine         d23, d22, d21, d20, vqadd, r8
2239*c0909341SAndroid Build Coastguard Worker        combine         d19, d18, d17, d16, vqadd, r8
2240*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8
2241*c0909341SAndroid Build Coastguard Worker        combine         d16, d17, d18, d19, vqsub, r9
2242*c0909341SAndroid Build Coastguard Worker        combine         d20, d21, d22, d23, vqsub, r9
2243*c0909341SAndroid Build Coastguard Worker        combine         d24, d25, d26, d27, vqsub, r9
2244*c0909341SAndroid Build Coastguard Worker        combine         d28, d29, d30, d31, vqsub, r9
2245*c0909341SAndroid Build Coastguard Worker.purgem combine
2246*c0909341SAndroid Build Coastguard Worker
2247*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
2248*c0909341SAndroid Build Coastguard Workerendfunc
2249*c0909341SAndroid Build Coastguard Worker
2250*c0909341SAndroid Build Coastguard Workerconst eob_32x32
2251*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 136, 210, 300, 406, 1024
2252*c0909341SAndroid Build Coastguard Workerendconst
2253*c0909341SAndroid Build Coastguard Worker
2254*c0909341SAndroid Build Coastguard Workerconst eob_16x32
2255*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 151, 215, 279, 343, 512
2256*c0909341SAndroid Build Coastguard Workerendconst
2257*c0909341SAndroid Build Coastguard Worker
2258*c0909341SAndroid Build Coastguard Workerconst eob_16x32_shortside
2259*c0909341SAndroid Build Coastguard Worker        .short 10, 36, 78, 512
2260*c0909341SAndroid Build Coastguard Workerendconst
2261*c0909341SAndroid Build Coastguard Worker
2262*c0909341SAndroid Build Coastguard Workerconst eob_8x32
2263*c0909341SAndroid Build Coastguard Worker        // Contrary to the others, this one is only ever used in increments of 8x8
2264*c0909341SAndroid Build Coastguard Worker        .short 43, 107, 171, 256
2265*c0909341SAndroid Build Coastguard Workerendconst
2266*c0909341SAndroid Build Coastguard Worker
2267*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_32x32_8bpc_neon, export=1
2268*c0909341SAndroid Build Coastguard Worker        push            {r4-r7,lr}
2269*c0909341SAndroid Build Coastguard Worker        vmov.i16        q0,  #0
2270*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  eob_32x32, 2
2271*c0909341SAndroid Build Coastguard Worker
2272*c0909341SAndroid Build Coastguard Worker        mov             r6,  #2*32
2273*c0909341SAndroid Build Coastguard Worker1:
2274*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
2275*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_32x32, 2
2276*c0909341SAndroid Build Coastguard Worker2:
2277*c0909341SAndroid Build Coastguard Worker        add             r12, r12, #8
2278*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2279*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
2280*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r2, :128], r6
2281*c0909341SAndroid Build Coastguard Worker.endr
2282*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
2283*c0909341SAndroid Build Coastguard Worker
2284*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r7, shiftbits=2
2285*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #4
2286*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #3
2287*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2288*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #8
2289*c0909341SAndroid Build Coastguard Worker        bge             2b
2290*c0909341SAndroid Build Coastguard Worker
2291*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r5], #4
2292*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2293*c0909341SAndroid Build Coastguard Worker        blt             9f
2294*c0909341SAndroid Build Coastguard Worker
2295*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r12
2296*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1, lsl #3
2297*c0909341SAndroid Build Coastguard Worker        mls             r2,  r6,  r12, r2
2298*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2*8
2299*c0909341SAndroid Build Coastguard Worker        b               1b
2300*c0909341SAndroid Build Coastguard Worker9:
2301*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7,pc}
2302*c0909341SAndroid Build Coastguard Workerendfunc
2303*c0909341SAndroid Build Coastguard Worker
2304*c0909341SAndroid Build Coastguard Worker.macro shift_8_regs op, shift
2305*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2306*c0909341SAndroid Build Coastguard Worker        \op             \i,  \i,  #\shift
2307*c0909341SAndroid Build Coastguard Worker.endr
2308*c0909341SAndroid Build Coastguard Worker.endm
2309*c0909341SAndroid Build Coastguard Worker
2310*c0909341SAndroid Build Coastguard Worker.macro def_identity_1632 w, h, wshort, hshort
2311*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
2312*c0909341SAndroid Build Coastguard Worker        push            {r4-r7,lr}
2313*c0909341SAndroid Build Coastguard Worker        movw            r6,  #2896*8
2314*c0909341SAndroid Build Coastguard Worker        movw            r7,  #2*(5793-4096)*8
2315*c0909341SAndroid Build Coastguard Worker        vdup.i16        d0,  r6
2316*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  eob_16x32\hshort, 2
2317*c0909341SAndroid Build Coastguard Worker        vmov.16         d0[1], r7
2318*c0909341SAndroid Build Coastguard Worker
2319*c0909341SAndroid Build Coastguard Worker        mov             r6,  #2*\h
2320*c0909341SAndroid Build Coastguard Worker1:
2321*c0909341SAndroid Build Coastguard Worker        mov             r12, #0
2322*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_16x32\wshort, 2
2323*c0909341SAndroid Build Coastguard Worker2:
2324*c0909341SAndroid Build Coastguard Worker        vmov.i16        q1,  #0
2325*c0909341SAndroid Build Coastguard Worker        add             r12, r12, #8
2326*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2327*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
2328*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1}, [r2, :128], r6
2329*c0909341SAndroid Build Coastguard Worker.endr
2330*c0909341SAndroid Build Coastguard Worker        scale_input     d0[0], q8,  q9, q10, q11, q12, q13, q14, q15
2331*c0909341SAndroid Build Coastguard Worker
2332*c0909341SAndroid Build Coastguard Worker.if \w == 16
2333*c0909341SAndroid Build Coastguard Worker        // 16x32
2334*c0909341SAndroid Build Coastguard Worker        identity_8x8_shift1 d0[1]
2335*c0909341SAndroid Build Coastguard Worker.else
2336*c0909341SAndroid Build Coastguard Worker        // 32x16
2337*c0909341SAndroid Build Coastguard Worker        shift_8_regs    vqshl.s16, 1
2338*c0909341SAndroid Build Coastguard Worker        identity_8x8    d0[1]
2339*c0909341SAndroid Build Coastguard Worker.endif
2340*c0909341SAndroid Build Coastguard Worker
2341*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
2342*c0909341SAndroid Build Coastguard Worker
2343*c0909341SAndroid Build Coastguard Worker.if \w == 16
2344*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r7, shiftbits=2
2345*c0909341SAndroid Build Coastguard Worker.else
2346*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r7, shiftbits=4
2347*c0909341SAndroid Build Coastguard Worker.endif
2348*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #4
2349*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #3
2350*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2351*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #8
2352*c0909341SAndroid Build Coastguard Worker        bge             2b
2353*c0909341SAndroid Build Coastguard Worker
2354*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r5], #4
2355*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2356*c0909341SAndroid Build Coastguard Worker        blt             9f
2357*c0909341SAndroid Build Coastguard Worker
2358*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r12
2359*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1, lsl #3
2360*c0909341SAndroid Build Coastguard Worker        mls             r2,  r6,  r12, r2
2361*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2*8
2362*c0909341SAndroid Build Coastguard Worker        b               1b
2363*c0909341SAndroid Build Coastguard Worker9:
2364*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7,pc}
2365*c0909341SAndroid Build Coastguard Workerendfunc
2366*c0909341SAndroid Build Coastguard Worker.endm
2367*c0909341SAndroid Build Coastguard Worker
2368*c0909341SAndroid Build Coastguard Workerdef_identity_1632 16, 32, _shortside,
2369*c0909341SAndroid Build Coastguard Workerdef_identity_1632 32, 16, , _shortside
2370*c0909341SAndroid Build Coastguard Worker
2371*c0909341SAndroid Build Coastguard Worker.macro def_identity_832 w, h
2372*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
2373*c0909341SAndroid Build Coastguard Worker        push            {r4-r5,lr}
2374*c0909341SAndroid Build Coastguard Worker        vmov.i16        q0,  #0
2375*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  eob_8x32
2376*c0909341SAndroid Build Coastguard Worker
2377*c0909341SAndroid Build Coastguard Worker        mov             r12, #2*\h
2378*c0909341SAndroid Build Coastguard Worker1:
2379*c0909341SAndroid Build Coastguard Worker        ldrh            lr,  [r4], #2
2380*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2381*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
2382*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r2, :128], r12
2383*c0909341SAndroid Build Coastguard Worker.endr
2384*c0909341SAndroid Build Coastguard Worker
2385*c0909341SAndroid Build Coastguard Worker.if \w == 8
2386*c0909341SAndroid Build Coastguard Worker        // 8x32
2387*c0909341SAndroid Build Coastguard Worker        shift_8_regs    vrshr.s16, 1
2388*c0909341SAndroid Build Coastguard Worker.endif
2389*c0909341SAndroid Build Coastguard Worker
2390*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
2391*c0909341SAndroid Build Coastguard Worker
2392*c0909341SAndroid Build Coastguard Worker        cmp             r3,  lr
2393*c0909341SAndroid Build Coastguard Worker.if \w == 8
2394*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r5, shiftbits=2
2395*c0909341SAndroid Build Coastguard Worker.else
2396*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r0, r5, shiftbits=3
2397*c0909341SAndroid Build Coastguard Worker.endif
2398*c0909341SAndroid Build Coastguard Worker
2399*c0909341SAndroid Build Coastguard Worker        blt             9f
2400*c0909341SAndroid Build Coastguard Worker.if \w == 8
2401*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r12, lsl #3
2402*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2*8
2403*c0909341SAndroid Build Coastguard Worker.else
2404*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r1, lsl #3
2405*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  #8
2406*c0909341SAndroid Build Coastguard Worker.endif
2407*c0909341SAndroid Build Coastguard Worker        b               1b
2408*c0909341SAndroid Build Coastguard Worker
2409*c0909341SAndroid Build Coastguard Worker9:
2410*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5,pc}
2411*c0909341SAndroid Build Coastguard Workerendfunc
2412*c0909341SAndroid Build Coastguard Worker.endm
2413*c0909341SAndroid Build Coastguard Worker
2414*c0909341SAndroid Build Coastguard Workerdef_identity_832 8, 32
2415*c0909341SAndroid Build Coastguard Workerdef_identity_832 32, 8
2416*c0909341SAndroid Build Coastguard Worker
2417*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x32_8bpc_neon, export=1
2418*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  32,  2
2419*c0909341SAndroid Build Coastguard Worker
2420*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2421*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
2422*c0909341SAndroid Build Coastguard Worker        sub_sp_align    2048
2423*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
2424*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2425*c0909341SAndroid Build Coastguard Worker
2426*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2427*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2428*c0909341SAndroid Build Coastguard Worker.if \i > 0
2429*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
2430*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2431*c0909341SAndroid Build Coastguard Worker        blt             1f
2432*c0909341SAndroid Build Coastguard Worker.if \i < 28
2433*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2434*c0909341SAndroid Build Coastguard Worker.endif
2435*c0909341SAndroid Build Coastguard Worker.endif
2436*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
2437*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
2438*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x4_neon
2439*c0909341SAndroid Build Coastguard Worker.endr
2440*c0909341SAndroid Build Coastguard Worker        b               3f
2441*c0909341SAndroid Build Coastguard Worker
2442*c0909341SAndroid Build Coastguard Worker1:
2443*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2444*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2445*c0909341SAndroid Build Coastguard Worker2:
2446*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2447*c0909341SAndroid Build Coastguard Worker.rept 4
2448*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2449*c0909341SAndroid Build Coastguard Worker.endr
2450*c0909341SAndroid Build Coastguard Worker        bgt             2b
2451*c0909341SAndroid Build Coastguard Worker
2452*c0909341SAndroid Build Coastguard Worker3:
2453*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2454*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
2455*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2456*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
2457*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2458*c0909341SAndroid Build Coastguard Worker.endr
2459*c0909341SAndroid Build Coastguard Worker
2460*c0909341SAndroid Build Coastguard Worker        add_sp_align    2048
2461*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
2462*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2463*c0909341SAndroid Build Coastguard Workerendfunc
2464*c0909341SAndroid Build Coastguard Worker
2465*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x32_8bpc_neon, export=1
2466*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  32,  1
2467*c0909341SAndroid Build Coastguard Worker
2468*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2469*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
2470*c0909341SAndroid Build Coastguard Worker        sub_sp_align    1024
2471*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
2472*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2473*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_dct_4h_x16_neon
2474*c0909341SAndroid Build Coastguard Worker
2475*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2476*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*16*2)
2477*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
2478*c0909341SAndroid Build Coastguard Worker.if \i > 0
2479*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
2480*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2481*c0909341SAndroid Build Coastguard Worker        blt             1f
2482*c0909341SAndroid Build Coastguard Worker.if \i < 28
2483*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2484*c0909341SAndroid Build Coastguard Worker.endif
2485*c0909341SAndroid Build Coastguard Worker.endif
2486*c0909341SAndroid Build Coastguard Worker        mov             r8,  #2*32
2487*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_16x4_neon
2488*c0909341SAndroid Build Coastguard Worker.endr
2489*c0909341SAndroid Build Coastguard Worker        b               3f
2490*c0909341SAndroid Build Coastguard Worker
2491*c0909341SAndroid Build Coastguard Worker1:
2492*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2493*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2494*c0909341SAndroid Build Coastguard Worker2:
2495*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2496*c0909341SAndroid Build Coastguard Worker.rept 4
2497*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2498*c0909341SAndroid Build Coastguard Worker.endr
2499*c0909341SAndroid Build Coastguard Worker        bgt             2b
2500*c0909341SAndroid Build Coastguard Worker
2501*c0909341SAndroid Build Coastguard Worker3:
2502*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
2503*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
2504*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2505*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
2506*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2507*c0909341SAndroid Build Coastguard Worker.endr
2508*c0909341SAndroid Build Coastguard Worker
2509*c0909341SAndroid Build Coastguard Worker        add_sp_align    1024
2510*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
2511*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2512*c0909341SAndroid Build Coastguard Workerendfunc
2513*c0909341SAndroid Build Coastguard Worker
2514*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x16_8bpc_neon, export=1
2515*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  16,  1
2516*c0909341SAndroid Build Coastguard Worker
2517*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2518*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
2519*c0909341SAndroid Build Coastguard Worker        sub_sp_align    1024
2520*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
2521*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2522*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_dct_4h_x16_neon
2523*c0909341SAndroid Build Coastguard Worker
2524*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
2525*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2526*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
2527*c0909341SAndroid Build Coastguard Worker.if \i > 0
2528*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
2529*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2530*c0909341SAndroid Build Coastguard Worker        blt             1f
2531*c0909341SAndroid Build Coastguard Worker.if \i < 12
2532*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2533*c0909341SAndroid Build Coastguard Worker.endif
2534*c0909341SAndroid Build Coastguard Worker.endif
2535*c0909341SAndroid Build Coastguard Worker        mov             r8,  #2*16
2536*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x4_neon
2537*c0909341SAndroid Build Coastguard Worker.endr
2538*c0909341SAndroid Build Coastguard Worker        b               3f
2539*c0909341SAndroid Build Coastguard Worker
2540*c0909341SAndroid Build Coastguard Worker1:
2541*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2542*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2543*c0909341SAndroid Build Coastguard Worker2:
2544*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2545*c0909341SAndroid Build Coastguard Worker.rept 4
2546*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2547*c0909341SAndroid Build Coastguard Worker.endr
2548*c0909341SAndroid Build Coastguard Worker        bgt             2b
2549*c0909341SAndroid Build Coastguard Worker
2550*c0909341SAndroid Build Coastguard Worker3:
2551*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
2552*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
2553*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2554*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
2555*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
2556*c0909341SAndroid Build Coastguard Worker.endr
2557*c0909341SAndroid Build Coastguard Worker
2558*c0909341SAndroid Build Coastguard Worker        add_sp_align    1024
2559*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
2560*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2561*c0909341SAndroid Build Coastguard Workerendfunc
2562*c0909341SAndroid Build Coastguard Worker
2563*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_8x32_8bpc_neon, export=1
2564*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   32,  2
2565*c0909341SAndroid Build Coastguard Worker
2566*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2567*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2568*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
2569*c0909341SAndroid Build Coastguard Worker
2570*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_8x32
2571*c0909341SAndroid Build Coastguard Worker
2572*c0909341SAndroid Build Coastguard Worker        mov             r8,  #2*32
2573*c0909341SAndroid Build Coastguard Worker        mov             r9,  #32
2574*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
2575*c0909341SAndroid Build Coastguard Worker1:
2576*c0909341SAndroid Build Coastguard Worker        vmov.i16        q0,  #0
2577*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2578*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r2, :128]
2579*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r2, :128], r8
2580*c0909341SAndroid Build Coastguard Worker.endr
2581*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
2582*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r8, lsl #3
2583*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #8
2584*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2*8
2585*c0909341SAndroid Build Coastguard Worker
2586*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x8_neon
2587*c0909341SAndroid Build Coastguard Worker
2588*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2589*c0909341SAndroid Build Coastguard Worker        vrshr.s16       \i,  \i,  #2
2590*c0909341SAndroid Build Coastguard Worker.endr
2591*c0909341SAndroid Build Coastguard Worker
2592*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  q8,  q9,  q10, q11, q12, q13, q14, q15, d17, d19, d21, d23, d24, d26, d28, d30
2593*c0909341SAndroid Build Coastguard Worker
2594*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r6, :128]!
2595*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
2596*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r6, :128]!
2597*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r6, :128]!
2598*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [r6, :128]!
2599*c0909341SAndroid Build Coastguard Worker
2600*c0909341SAndroid Build Coastguard Worker        bge             1b
2601*c0909341SAndroid Build Coastguard Worker        cmp             r9,  #0
2602*c0909341SAndroid Build Coastguard Worker        beq             3f
2603*c0909341SAndroid Build Coastguard Worker
2604*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2605*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2606*c0909341SAndroid Build Coastguard Worker2:
2607*c0909341SAndroid Build Coastguard Worker        subs            r9,  r9,  #8
2608*c0909341SAndroid Build Coastguard Worker.rept 4
2609*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2610*c0909341SAndroid Build Coastguard Worker.endr
2611*c0909341SAndroid Build Coastguard Worker        bgt             2b
2612*c0909341SAndroid Build Coastguard Worker
2613*c0909341SAndroid Build Coastguard Worker3:
2614*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
2615*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
2616*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  #(\i*2)
2617*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*2
2618*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
2619*c0909341SAndroid Build Coastguard Worker.endr
2620*c0909341SAndroid Build Coastguard Worker
2621*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
2622*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2623*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2624*c0909341SAndroid Build Coastguard Workerendfunc
2625*c0909341SAndroid Build Coastguard Worker
2626*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x8_8bpc_neon, export=1
2627*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  8,   2
2628*c0909341SAndroid Build Coastguard Worker
2629*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
2630*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2631*c0909341SAndroid Build Coastguard Worker        sub_sp_align    512
2632*c0909341SAndroid Build Coastguard Worker
2633*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4
2634*c0909341SAndroid Build Coastguard Worker        add             r6,  sp,  #(\i*32*2)
2635*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
2636*c0909341SAndroid Build Coastguard Worker.if \i > 0
2637*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #10
2638*c0909341SAndroid Build Coastguard Worker        blt             1f
2639*c0909341SAndroid Build Coastguard Worker.endif
2640*c0909341SAndroid Build Coastguard Worker        mov             r8,  #8*2
2641*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x4_neon
2642*c0909341SAndroid Build Coastguard Worker.endr
2643*c0909341SAndroid Build Coastguard Worker        b               2f
2644*c0909341SAndroid Build Coastguard Worker
2645*c0909341SAndroid Build Coastguard Worker1:
2646*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
2647*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
2648*c0909341SAndroid Build Coastguard Worker.rept 8
2649*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
2650*c0909341SAndroid Build Coastguard Worker.endr
2651*c0909341SAndroid Build Coastguard Worker
2652*c0909341SAndroid Build Coastguard Worker2:
2653*c0909341SAndroid Build Coastguard Worker        mov             r8,  #2*32
2654*c0909341SAndroid Build Coastguard Worker        mov             r9,  #0
2655*c0909341SAndroid Build Coastguard Worker1:
2656*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r9
2657*c0909341SAndroid Build Coastguard Worker        add             r7,  sp,  r9, lsl #1 // #(\i*2)
2658*c0909341SAndroid Build Coastguard Worker
2659*c0909341SAndroid Build Coastguard Worker.irp i, q8, q9, q10, q11, q12, q13, q14, q15
2660*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [r7, :128], r8
2661*c0909341SAndroid Build Coastguard Worker.endr
2662*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #8
2663*c0909341SAndroid Build Coastguard Worker
2664*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x8_neon
2665*c0909341SAndroid Build Coastguard Worker
2666*c0909341SAndroid Build Coastguard Worker        cmp             r9,  #32
2667*c0909341SAndroid Build Coastguard Worker
2668*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 r6, r7
2669*c0909341SAndroid Build Coastguard Worker
2670*c0909341SAndroid Build Coastguard Worker        blt             1b
2671*c0909341SAndroid Build Coastguard Worker
2672*c0909341SAndroid Build Coastguard Worker        add_sp_align    512
2673*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2674*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
2675*c0909341SAndroid Build Coastguard Workerendfunc
2676*c0909341SAndroid Build Coastguard Worker
2677*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step1_neon
2678*c0909341SAndroid Build Coastguard Worker        // in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a
2679*c0909341SAndroid Build Coastguard Worker        // in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a
2680*c0909341SAndroid Build Coastguard Worker        // in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a
2681*c0909341SAndroid Build Coastguard Worker        // in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a
2682*c0909341SAndroid Build Coastguard Worker
2683*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0, d1, d2}, [r12, :64]!
2684*c0909341SAndroid Build Coastguard Worker
2685*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d23, d16, d0[1]  // t63a
2686*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d16, d16, d0[0]  // t32a
2687*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d22, d17, d0[2]  // t62a
2688*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d17, d17, d0[3]  // t33a
2689*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d21, d18, d1[1]  // t61a
2690*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d18, d18, d1[0]  // t34a
2691*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d20, d19, d1[2]  // t60a
2692*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    d19, d19, d1[3]  // t35a
2693*c0909341SAndroid Build Coastguard Worker
2694*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d24, d16, d17    // t32
2695*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d16, d17    // t33
2696*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d26, d19, d18    // t34
2697*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d27, d19, d18    // t35
2698*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d28, d20, d21    // t60
2699*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d29, d20, d21    // t61
2700*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d30, d23, d22    // t62
2701*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d31, d23, d22    // t63
2702*c0909341SAndroid Build Coastguard Worker
2703*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d29, d26, d2[0], d2[1] // -> t34a
2704*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d29, d26, d2[1], d2[0] // -> t61a
2705*c0909341SAndroid Build Coastguard Worker        vneg.s32        q2,  q2                     // t34a
2706*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d30, d25, d2[1], d2[0] // -> t33a
2707*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d26, q2,  #12               // t34a
2708*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d30, d25, d2[0], d2[1] // -> t62a
2709*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d29, q3,  #12               // t61a
2710*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q4,  #12               // t33a
2711*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d30, q2,  #12               // t62a
2712*c0909341SAndroid Build Coastguard Worker
2713*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d24, d27    // t32a
2714*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d19, d24, d27    // t35a
2715*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d25, d26    // t33
2716*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d18, d25, d26    // t34
2717*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d31, d28    // t60a
2718*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d31, d28    // t63a
2719*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d30, d29    // t61
2720*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d22, d30, d29    // t62
2721*c0909341SAndroid Build Coastguard Worker
2722*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d21, d18, d2[2], d2[3] // -> t61a
2723*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d21, d18, d2[3], d2[2] // -> t34a
2724*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d20, d19, d2[2], d2[3] // -> t60
2725*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q2,  #12               // t61a
2726*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q3,  #12               // t34a
2727*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d20, d19, d2[3], d2[2] // -> t35
2728*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q4,  #12               // t60
2729*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q2,  #12               // t35
2730*c0909341SAndroid Build Coastguard Worker
2731*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16, d17, d18, d19}, [r6, :128]!
2732*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20, d21, d22, d23}, [r6, :128]!
2733*c0909341SAndroid Build Coastguard Worker
2734*c0909341SAndroid Build Coastguard Worker        bx              lr
2735*c0909341SAndroid Build Coastguard Workerendfunc
2736*c0909341SAndroid Build Coastguard Worker
2737*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step2_neon
2738*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct_coeffs
2739*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r12, :64]
2740*c0909341SAndroid Build Coastguard Worker1:
2741*c0909341SAndroid Build Coastguard Worker        // t32a/33/34a/35/60/61a/62/63a
2742*c0909341SAndroid Build Coastguard Worker        // t56a/57/58a/59/36/37a/38/39a
2743*c0909341SAndroid Build Coastguard Worker        // t40a/41/42a/43/52/53a/54/55a
2744*c0909341SAndroid Build Coastguard Worker        // t48a/49/50a/51/44/45a/46/47a
2745*c0909341SAndroid Build Coastguard Worker        vldr            d16, [r6, #2*4*0]  // t32a
2746*c0909341SAndroid Build Coastguard Worker        vldr            d17, [r9, #2*4*8]  // t39a
2747*c0909341SAndroid Build Coastguard Worker        vldr            d18, [r9, #2*4*0]  // t63a
2748*c0909341SAndroid Build Coastguard Worker        vldr            d19, [r6, #2*4*8]  // t56a
2749*c0909341SAndroid Build Coastguard Worker        vldr            d20, [r6, #2*4*16] // t40a
2750*c0909341SAndroid Build Coastguard Worker        vldr            d21, [r9, #2*4*24] // t47a
2751*c0909341SAndroid Build Coastguard Worker        vldr            d22, [r9, #2*4*16] // t55a
2752*c0909341SAndroid Build Coastguard Worker        vldr            d23, [r6, #2*4*24] // t48a
2753*c0909341SAndroid Build Coastguard Worker
2754*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d24, d16, d17      // t32
2755*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d25, d16, d17      // t39
2756*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d26, d18, d19      // t63
2757*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d27, d18, d19      // t56
2758*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d28, d21, d20      // t40
2759*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d29, d21, d20      // t47
2760*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d30, d23, d22      // t48
2761*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d31, d23, d22      // t55
2762*c0909341SAndroid Build Coastguard Worker
2763*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d27, d25, d0[3], d0[2] // -> t56a
2764*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q3,  d27, d25, d0[2], d0[3] // -> t39a
2765*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q4,  d31, d28, d0[3], d0[2] // -> t40a
2766*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d25, q2,  #12               // t56a
2767*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d27, q3,  #12               // t39a
2768*c0909341SAndroid Build Coastguard Worker        vneg.s32        q4,  q4                     // t40a
2769*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d31, d28, d0[2], d0[3] // -> t55a
2770*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d31, q4,  #12               // t40a
2771*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d28, q2,  #12               // t55a
2772*c0909341SAndroid Build Coastguard Worker
2773*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d16, d24, d29      // t32a
2774*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d19, d24, d29      // t47a
2775*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d17, d27, d31      // t39
2776*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d18, d27, d31      // t40
2777*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d20, d26, d30      // t48a
2778*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d23, d26, d30      // t63a
2779*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d21, d25, d28      // t55
2780*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d22, d25, d28      // t56
2781*c0909341SAndroid Build Coastguard Worker
2782*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q2,  d21, d18, d0[0], d0[0] // -> t40a
2783*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q3,  d21, d18, d0[0], d0[0] // -> t55a
2784*c0909341SAndroid Build Coastguard Worker        vmull_vmlsl     q4,  d20, d19, d0[0], d0[0] // -> t47
2785*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d18, q2,  #12               // t40a
2786*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d21, q3,  #12               // t55a
2787*c0909341SAndroid Build Coastguard Worker        vmull_vmlal     q2,  d20, d19, d0[0], d0[0] // -> t48
2788*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d19, q4,  #12               // t47
2789*c0909341SAndroid Build Coastguard Worker        vqrshrn.s32     d20, q2,  #12               // t48
2790*c0909341SAndroid Build Coastguard Worker
2791*c0909341SAndroid Build Coastguard Worker        vstr            d16, [r6, #2*4*0]  // t32a
2792*c0909341SAndroid Build Coastguard Worker        vstr            d17, [r9, #2*4*0]  // t39
2793*c0909341SAndroid Build Coastguard Worker        vstr            d18, [r6, #2*4*8]  // t40a
2794*c0909341SAndroid Build Coastguard Worker        vstr            d19, [r9, #2*4*8]  // t47
2795*c0909341SAndroid Build Coastguard Worker        vstr            d20, [r6, #2*4*16] // t48
2796*c0909341SAndroid Build Coastguard Worker        vstr            d21, [r9, #2*4*16] // t55a
2797*c0909341SAndroid Build Coastguard Worker        vstr            d22, [r6, #2*4*24] // t56
2798*c0909341SAndroid Build Coastguard Worker        vstr            d23, [r9, #2*4*24] // t63a
2799*c0909341SAndroid Build Coastguard Worker
2800*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #2*4
2801*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #2*4
2802*c0909341SAndroid Build Coastguard Worker        cmp             r6,  r9
2803*c0909341SAndroid Build Coastguard Worker        blt             1b
2804*c0909341SAndroid Build Coastguard Worker        bx              lr
2805*c0909341SAndroid Build Coastguard Workerendfunc
2806*c0909341SAndroid Build Coastguard Worker
2807*c0909341SAndroid Build Coastguard Worker.macro load8 src, strd, zero, clear
2808*c0909341SAndroid Build Coastguard Worker.irp i, d16, d17, d18, d19, d20, d21, d22, d23
2809*c0909341SAndroid Build Coastguard Worker.if \clear
2810*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [\src, :64]
2811*c0909341SAndroid Build Coastguard Worker        vst1.16         {\zero}, [\src, :64], \strd
2812*c0909341SAndroid Build Coastguard Worker.else
2813*c0909341SAndroid Build Coastguard Worker        vld1.16         {\i}, [\src, :64], \strd
2814*c0909341SAndroid Build Coastguard Worker.endif
2815*c0909341SAndroid Build Coastguard Worker.endr
2816*c0909341SAndroid Build Coastguard Worker.endm
2817*c0909341SAndroid Build Coastguard Worker
2818*c0909341SAndroid Build Coastguard Worker.macro store16 dst
2819*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [\dst, :128]!
2820*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [\dst, :128]!
2821*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [\dst, :128]!
2822*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [\dst, :128]!
2823*c0909341SAndroid Build Coastguard Worker.endm
2824*c0909341SAndroid Build Coastguard Worker
2825*c0909341SAndroid Build Coastguard Worker.macro clear_upper8
2826*c0909341SAndroid Build Coastguard Worker.irp i, q12, q13, q14, q15
2827*c0909341SAndroid Build Coastguard Worker        vmov.i16        \i,  #0
2828*c0909341SAndroid Build Coastguard Worker.endr
2829*c0909341SAndroid Build Coastguard Worker.endm
2830*c0909341SAndroid Build Coastguard Worker
2831*c0909341SAndroid Build Coastguard Worker.macro vmov_if reg, val, cond
2832*c0909341SAndroid Build Coastguard Worker.if \cond
2833*c0909341SAndroid Build Coastguard Worker        vmov.i16        \reg, \val
2834*c0909341SAndroid Build Coastguard Worker.endif
2835*c0909341SAndroid Build Coastguard Worker.endm
2836*c0909341SAndroid Build Coastguard Worker
2837*c0909341SAndroid Build Coastguard Worker.macro movdup_if reg, gpr, val, cond
2838*c0909341SAndroid Build Coastguard Worker.if \cond
2839*c0909341SAndroid Build Coastguard Worker        movw            \gpr, \val
2840*c0909341SAndroid Build Coastguard Worker        vdup.16         \reg, \gpr
2841*c0909341SAndroid Build Coastguard Worker.endif
2842*c0909341SAndroid Build Coastguard Worker.endm
2843*c0909341SAndroid Build Coastguard Worker
2844*c0909341SAndroid Build Coastguard Worker.macro vst1_if regs, dst, dstalign, cond
2845*c0909341SAndroid Build Coastguard Worker.if \cond
2846*c0909341SAndroid Build Coastguard Worker        vst1.16         \regs, \dst, \dstalign
2847*c0909341SAndroid Build Coastguard Worker.endif
2848*c0909341SAndroid Build Coastguard Worker.endm
2849*c0909341SAndroid Build Coastguard Worker
2850*c0909341SAndroid Build Coastguard Worker.macro scale_if cond, c, r0, r1, r2, r3, r4, r5, r6, r7
2851*c0909341SAndroid Build Coastguard Worker.if \cond
2852*c0909341SAndroid Build Coastguard Worker        scale_input     \c, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
2853*c0909341SAndroid Build Coastguard Worker.endif
2854*c0909341SAndroid Build Coastguard Worker.endm
2855*c0909341SAndroid Build Coastguard Worker
2856*c0909341SAndroid Build Coastguard Worker.macro def_dct64_func suffix, clear=0, scale=0
2857*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_dct\suffix\()_4h_x64_neon, export=1
2858*c0909341SAndroid Build Coastguard Worker        mov             r6,  sp
2859*c0909341SAndroid Build Coastguard Worker
2860*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
2861*c0909341SAndroid Build Coastguard Worker
2862*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #2
2863*c0909341SAndroid Build Coastguard Worker
2864*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  r12, #2896*8, \scale
2865*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2866*c0909341SAndroid Build Coastguard Worker        load8           r7,  r8,  d7,  \clear
2867*c0909341SAndroid Build Coastguard Worker        clear_upper8
2868*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #3
2869*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsr #1
2870*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9, q10, q11
2871*c0909341SAndroid Build Coastguard Worker
2872*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_4h_x16_neon
2873*c0909341SAndroid Build Coastguard Worker
2874*c0909341SAndroid Build Coastguard Worker        store16         r6
2875*c0909341SAndroid Build Coastguard Worker
2876*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  r12, #2896*8, \scale
2877*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2878*c0909341SAndroid Build Coastguard Worker        load8           r7,  r8,  d7,  \clear
2879*c0909341SAndroid Build Coastguard Worker        clear_upper8
2880*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsl #3
2881*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  #1
2882*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r8, lsr #1
2883*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9, q10, q11
2884*c0909341SAndroid Build Coastguard Worker
2885*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_4h_x16_neon
2886*c0909341SAndroid Build Coastguard Worker
2887*c0909341SAndroid Build Coastguard Worker        add             r10, r6,  #8*15
2888*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #8*16
2889*c0909341SAndroid Build Coastguard Worker
2890*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-8
2891*c0909341SAndroid Build Coastguard Worker
2892*c0909341SAndroid Build Coastguard Worker.macro store_addsub r0, r1, r2, r3
2893*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2},  [r6, :64]!
2894*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3},  [r6, :64]!
2895*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d6,  d2,  \r0
2896*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r0, d2,  \r0
2897*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4},  [r6, :64]!
2898*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d7,  d3,  \r1
2899*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r1, d3,  \r1
2900*c0909341SAndroid Build Coastguard Worker        vld1.16         {d5},  [r6, :64]!
2901*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d2,  d4,  \r2
2902*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #8*4
2903*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r2, d4,  \r2
2904*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6},  [r6,  :64]!
2905*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r0}, [r10, :64], r9
2906*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d3,  d5,  \r3
2907*c0909341SAndroid Build Coastguard Worker        vqsub.s16       \r3, d5,  \r3
2908*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7},  [r6,  :64]!
2909*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r1}, [r10, :64], r9
2910*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2},  [r6,  :64]!
2911*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r2}, [r10, :64], r9
2912*c0909341SAndroid Build Coastguard Worker        vst1.16         {d3},  [r6,  :64]!
2913*c0909341SAndroid Build Coastguard Worker        vst1.16         {\r3}, [r10, :64], r9
2914*c0909341SAndroid Build Coastguard Worker.endm
2915*c0909341SAndroid Build Coastguard Worker        store_addsub    d31, d30, d29, d28
2916*c0909341SAndroid Build Coastguard Worker        store_addsub    d27, d26, d25, d24
2917*c0909341SAndroid Build Coastguard Worker        store_addsub    d23, d22, d21, d20
2918*c0909341SAndroid Build Coastguard Worker        store_addsub    d19, d18, d17, d16
2919*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
2920*c0909341SAndroid Build Coastguard Worker
2921*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #2*4*16
2922*c0909341SAndroid Build Coastguard Worker
2923*c0909341SAndroid Build Coastguard Worker        movrel_local    r12, idct64_coeffs
2924*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  #2896*8, \scale
2925*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2926*c0909341SAndroid Build Coastguard Worker        add             r9,  r7,  r8, lsl #4 // offset 16
2927*c0909341SAndroid Build Coastguard Worker        add             r10, r7,  r8, lsl #3 // offset 8
2928*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8         // offset 15
2929*c0909341SAndroid Build Coastguard Worker        sub             r11, r10, r8         // offset 7
2930*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16}, [r7,  :64]    // in1  (offset 0)
2931*c0909341SAndroid Build Coastguard Worker        vld1.16         {d17}, [r9,  :64]    // in31 (offset 15)
2932*c0909341SAndroid Build Coastguard Worker        vld1.16         {d18}, [r10, :64]    // in17 (offset 8)
2933*c0909341SAndroid Build Coastguard Worker        vld1.16         {d19}, [r11, :64]    // in15 (offset 7)
2934*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r7,  :64], \clear
2935*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r9,  :64], \clear
2936*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r10, :64], \clear
2937*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r11, :64], \clear
2938*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
2939*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2940*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  #2896*8, \scale
2941*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2942*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8, lsl #2 // offset 4
2943*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8, lsl #2 // offset 11
2944*c0909341SAndroid Build Coastguard Worker        sub             r10, r7,  r8         // offset 3
2945*c0909341SAndroid Build Coastguard Worker        add             r11, r9,  r8         // offset 12
2946*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16}, [r10, :64]    // in7  (offset 3)
2947*c0909341SAndroid Build Coastguard Worker        vld1.16         {d17}, [r11, :64]    // in25 (offset 12)
2948*c0909341SAndroid Build Coastguard Worker        vld1.16         {d18}, [r9,  :64]    // in23 (offset 11)
2949*c0909341SAndroid Build Coastguard Worker        vld1.16         {d19}, [r7,  :64]    // in9  (offset 4)
2950*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r7,  :64], \clear
2951*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r9,  :64], \clear
2952*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r10, :64], \clear
2953*c0909341SAndroid Build Coastguard Worker        vst1_if         {d7},  [r11, :64], \clear
2954*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
2955*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2956*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  #2896*8, \scale
2957*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2958*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r8, lsl #1 // offset 1
2959*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8, lsl #1 // offset 9
2960*c0909341SAndroid Build Coastguard Worker        add             r10, r10, r8         // offset 2
2961*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  r8         // offset 10
2962*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8         // offset 5
2963*c0909341SAndroid Build Coastguard Worker        add             r11, r11, r8         // offset 13
2964*c0909341SAndroid Build Coastguard Worker        vld1.16         d16, [r10, :64]      // in5  (offset 2)
2965*c0909341SAndroid Build Coastguard Worker        vld1.16         d17, [r11, :64]      // in27 (offset 13)
2966*c0909341SAndroid Build Coastguard Worker        vld1.16         d18, [r9,  :64]      // in21 (offset 10)
2967*c0909341SAndroid Build Coastguard Worker        vld1.16         d19, [r7,  :64]      // in11 (offset 5)
2968*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r10, :64], \clear
2969*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r11, :64], \clear
2970*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r9,  :64], \clear
2971*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r7,  :64], \clear
2972*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
2973*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2974*c0909341SAndroid Build Coastguard Worker        movdup_if       d0,  lr,  #2896*8, \scale
2975*c0909341SAndroid Build Coastguard Worker        vmov_if         d7,  #0,  \clear
2976*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r8         // offset 1
2977*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r8         // offset 9
2978*c0909341SAndroid Build Coastguard Worker        add             r11, r11, r8         // offset 14
2979*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8         // offset 6
2980*c0909341SAndroid Build Coastguard Worker        vld1.16         d16, [r10, :64]      // in3  (offset 1)
2981*c0909341SAndroid Build Coastguard Worker        vld1.16         d17, [r11, :64]      // in29 (offset 14)
2982*c0909341SAndroid Build Coastguard Worker        vld1.16         d18, [r9,  :64]      // in19 (offset 9)
2983*c0909341SAndroid Build Coastguard Worker        vld1.16         d19, [r7,  :64]      // in13 (offset 6)
2984*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r10, :64], \clear
2985*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r11, :64], \clear
2986*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r9,  :64], \clear
2987*c0909341SAndroid Build Coastguard Worker        vst1_if         d7,  [r7,  :64], \clear
2988*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, d0[0], q8, q9
2989*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2990*c0909341SAndroid Build Coastguard Worker
2991*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #2*4*32
2992*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  #2*4*7
2993*c0909341SAndroid Build Coastguard Worker
2994*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step2_neon
2995*c0909341SAndroid Build Coastguard Worker
2996*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
2997*c0909341SAndroid Build Coastguard Workerendfunc
2998*c0909341SAndroid Build Coastguard Worker.endm
2999*c0909341SAndroid Build Coastguard Worker
3000*c0909341SAndroid Build Coastguard Workerdef_dct64_func
3001*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear, clear=1
3002*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear_scale, clear=1, scale=1
3003*c0909341SAndroid Build Coastguard Worker
3004*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz_dct_64x4_neon
3005*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  r9
3006*c0909341SAndroid Build Coastguard Worker
3007*c0909341SAndroid Build Coastguard Worker        mov             r7,  sp
3008*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #2*4*(64 - 4)
3009*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  #2*56
3010*c0909341SAndroid Build Coastguard Worker
3011*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
3012*c0909341SAndroid Build Coastguard Worker
3013*c0909341SAndroid Build Coastguard Worker        mov             r10, #2*64
3014*c0909341SAndroid Build Coastguard Worker        mov             r11, #-2*4*4
3015*c0909341SAndroid Build Coastguard Worker
3016*c0909341SAndroid Build Coastguard Worker1:
3017*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r7, :128]!
3018*c0909341SAndroid Build Coastguard Worker        vld1.16         {d28, d29, d30, d31}, [r8, :128], r11
3019*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r7, :128]!
3020*c0909341SAndroid Build Coastguard Worker        vld1.16         {d24, d25, d26, d27}, [r8, :128], r11
3021*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q8,  q9,  d16, d17, d18, d19
3022*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q15, q14, d31, d30, d29, d28
3023*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q10, q11, d20, d21, d22, d23
3024*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  q13, q12, d27, d26, d25, d24
3025*c0909341SAndroid Build Coastguard Worker
3026*c0909341SAndroid Build Coastguard Worker.macro store_addsub src0, src1, src2, src3
3027*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d3,  \src0,  \src1
3028*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d2,  \src2,  \src3
3029*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d0,  \src0,  \src1
3030*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d1,  \src2,  \src3
3031*c0909341SAndroid Build Coastguard Worker        vrshl.s16       q1,  q1,  q3
3032*c0909341SAndroid Build Coastguard Worker        vrshl.s16       q0,  q0,  q3
3033*c0909341SAndroid Build Coastguard Worker        vrev64.16       q1,  q1
3034*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r6, :128], r10
3035*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r9, :128], r10
3036*c0909341SAndroid Build Coastguard Worker.endm
3037*c0909341SAndroid Build Coastguard Worker        store_addsub    d16, d31, d20, d27
3038*c0909341SAndroid Build Coastguard Worker        store_addsub    d17, d30, d21, d26
3039*c0909341SAndroid Build Coastguard Worker        store_addsub    d18, d29, d22, d25
3040*c0909341SAndroid Build Coastguard Worker        store_addsub    d19, d28, d23, d24
3041*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
3042*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r10, lsl #2
3043*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r10, lsl #2
3044*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  #16
3045*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #16
3046*c0909341SAndroid Build Coastguard Worker
3047*c0909341SAndroid Build Coastguard Worker        cmp             r7,  r8
3048*c0909341SAndroid Build Coastguard Worker        blt             1b
3049*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
3050*c0909341SAndroid Build Coastguard Workerendfunc
3051*c0909341SAndroid Build Coastguard Worker
3052*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_4x64_neon
3053*c0909341SAndroid Build Coastguard Worker        lsl             r8,  r8,  #1
3054*c0909341SAndroid Build Coastguard Worker
3055*c0909341SAndroid Build Coastguard Worker        mov             r7,  sp
3056*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #2*4*(64 - 4)
3057*c0909341SAndroid Build Coastguard Worker        add             r9,  r6,  r1, lsl #6
3058*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r1
3059*c0909341SAndroid Build Coastguard Worker
3060*c0909341SAndroid Build Coastguard Worker        push            {r10-r11,lr}
3061*c0909341SAndroid Build Coastguard Worker
3062*c0909341SAndroid Build Coastguard Worker        neg             r10, r1
3063*c0909341SAndroid Build Coastguard Worker        mov             r11, #-2*4*4
3064*c0909341SAndroid Build Coastguard Worker
3065*c0909341SAndroid Build Coastguard Worker1:
3066*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r7, :128]!
3067*c0909341SAndroid Build Coastguard Worker        vld1.16         {d28, d29, d30, d31}, [r8, :128], r11
3068*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r7, :128]!
3069*c0909341SAndroid Build Coastguard Worker        vld1.16         {d24, d25, d26, d27}, [r8, :128], r11
3070*c0909341SAndroid Build Coastguard Worker
3071*c0909341SAndroid Build Coastguard Worker.macro add_dest_addsub src0, src1, src2, src3
3072*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[0]}, [r6, :32], r1
3073*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[0]}, [r9, :32], r10
3074*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d4,  \src0,  \src1
3075*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]}, [r6, :32]
3076*c0909341SAndroid Build Coastguard Worker        vqadd.s16       d5,  \src2,  \src3
3077*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[1]}, [r9, :32]
3078*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d6,  \src0,  \src1
3079*c0909341SAndroid Build Coastguard Worker        vqsub.s16       d7,  \src2,  \src3
3080*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r1
3081*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r10
3082*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
3083*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
3084*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q2,  q2,  d0
3085*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q3,  q3,  d1
3086*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d0,  q2
3087*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d1,  q3
3088*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]}, [r6, :32], r1
3089*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]}, [r9, :32], r10
3090*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[1]}, [r6, :32], r1
3091*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[1]}, [r9, :32], r10
3092*c0909341SAndroid Build Coastguard Worker.endm
3093*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d16, d31, d17, d30
3094*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d18, d29, d19, d28
3095*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d20, d27, d21, d26
3096*c0909341SAndroid Build Coastguard Worker        add_dest_addsub d22, d25, d23, d24
3097*c0909341SAndroid Build Coastguard Worker.purgem add_dest_addsub
3098*c0909341SAndroid Build Coastguard Worker        cmp             r7,  r8
3099*c0909341SAndroid Build Coastguard Worker        blt             1b
3100*c0909341SAndroid Build Coastguard Worker
3101*c0909341SAndroid Build Coastguard Worker        pop             {r10-r11,pc}
3102*c0909341SAndroid Build Coastguard Workerendfunc
3103*c0909341SAndroid Build Coastguard Worker
3104*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x64_8bpc_neon, export=1
3105*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  64,  2
3106*c0909341SAndroid Build Coastguard Worker
3107*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3108*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
3109*c0909341SAndroid Build Coastguard Worker
3110*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*32*2+64*4*2
3111*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3112*c0909341SAndroid Build Coastguard Worker
3113*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3114*c0909341SAndroid Build Coastguard Worker
3115*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3116*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3117*c0909341SAndroid Build Coastguard Worker.if \i > 0
3118*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3119*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3120*c0909341SAndroid Build Coastguard Worker        blt             1f
3121*c0909341SAndroid Build Coastguard Worker.endif
3122*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
3123*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3124*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_4h_x64_neon
3125*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3126*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-2 // shift
3127*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3128*c0909341SAndroid Build Coastguard Worker.if \i < 28
3129*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3130*c0909341SAndroid Build Coastguard Worker.endif
3131*c0909341SAndroid Build Coastguard Worker.endr
3132*c0909341SAndroid Build Coastguard Worker        b               3f
3133*c0909341SAndroid Build Coastguard Worker
3134*c0909341SAndroid Build Coastguard Worker1:
3135*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3136*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3137*c0909341SAndroid Build Coastguard Worker2:
3138*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3139*c0909341SAndroid Build Coastguard Worker.rept 8
3140*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3141*c0909341SAndroid Build Coastguard Worker.endr
3142*c0909341SAndroid Build Coastguard Worker        bgt             2b
3143*c0909341SAndroid Build Coastguard Worker
3144*c0909341SAndroid Build Coastguard Worker3:
3145*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3146*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3147*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3148*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_4h_x64_neon
3149*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
3150*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3151*c0909341SAndroid Build Coastguard Worker.endr
3152*c0909341SAndroid Build Coastguard Worker
3153*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*32*2+64*4*2
3154*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
3155*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3156*c0909341SAndroid Build Coastguard Workerendfunc
3157*c0909341SAndroid Build Coastguard Worker
3158*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x32_8bpc_neon, export=1
3159*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  32,  1
3160*c0909341SAndroid Build Coastguard Worker
3161*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3162*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
3163*c0909341SAndroid Build Coastguard Worker
3164*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*32*2+64*4*2
3165*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3166*c0909341SAndroid Build Coastguard Worker
3167*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3168*c0909341SAndroid Build Coastguard Worker
3169*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3170*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3171*c0909341SAndroid Build Coastguard Worker.if \i > 0
3172*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3173*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3174*c0909341SAndroid Build Coastguard Worker        blt             1f
3175*c0909341SAndroid Build Coastguard Worker.endif
3176*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
3177*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3178*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_scale_4h_x64_neon
3179*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*64*2)
3180*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-1 // shift
3181*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3182*c0909341SAndroid Build Coastguard Worker.if \i < 28
3183*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3184*c0909341SAndroid Build Coastguard Worker.endif
3185*c0909341SAndroid Build Coastguard Worker.endr
3186*c0909341SAndroid Build Coastguard Worker        b               3f
3187*c0909341SAndroid Build Coastguard Worker
3188*c0909341SAndroid Build Coastguard Worker1:
3189*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3190*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3191*c0909341SAndroid Build Coastguard Worker2:
3192*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3193*c0909341SAndroid Build Coastguard Worker.rept 8
3194*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3195*c0909341SAndroid Build Coastguard Worker.endr
3196*c0909341SAndroid Build Coastguard Worker        bgt             2b
3197*c0909341SAndroid Build Coastguard Worker
3198*c0909341SAndroid Build Coastguard Worker3:
3199*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3200*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
3201*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3202*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3203*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x32_neon
3204*c0909341SAndroid Build Coastguard Worker.endr
3205*c0909341SAndroid Build Coastguard Worker
3206*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*32*2+64*4*2
3207*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
3208*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3209*c0909341SAndroid Build Coastguard Workerendfunc
3210*c0909341SAndroid Build Coastguard Worker
3211*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
3212*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  64,  1
3213*c0909341SAndroid Build Coastguard Worker
3214*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3215*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
3216*c0909341SAndroid Build Coastguard Worker
3217*c0909341SAndroid Build Coastguard Worker        sub_sp_align    32*32*2+64*4*2
3218*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3219*c0909341SAndroid Build Coastguard Worker
3220*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_32x32
3221*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3222*c0909341SAndroid Build Coastguard Worker
3223*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3224*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*32*2)
3225*c0909341SAndroid Build Coastguard Worker.if \i > 0
3226*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3227*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3228*c0909341SAndroid Build Coastguard Worker        blt             1f
3229*c0909341SAndroid Build Coastguard Worker.if \i < 28
3230*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3231*c0909341SAndroid Build Coastguard Worker.endif
3232*c0909341SAndroid Build Coastguard Worker.endif
3233*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
3234*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3235*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x4_neon
3236*c0909341SAndroid Build Coastguard Worker.endr
3237*c0909341SAndroid Build Coastguard Worker        b               3f
3238*c0909341SAndroid Build Coastguard Worker
3239*c0909341SAndroid Build Coastguard Worker1:
3240*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3241*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3242*c0909341SAndroid Build Coastguard Worker2:
3243*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3244*c0909341SAndroid Build Coastguard Worker.rept 4
3245*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3246*c0909341SAndroid Build Coastguard Worker.endr
3247*c0909341SAndroid Build Coastguard Worker        bgt             2b
3248*c0909341SAndroid Build Coastguard Worker
3249*c0909341SAndroid Build Coastguard Worker3:
3250*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3251*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3252*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3253*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_4h_x64_neon
3254*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
3255*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3256*c0909341SAndroid Build Coastguard Worker.endr
3257*c0909341SAndroid Build Coastguard Worker
3258*c0909341SAndroid Build Coastguard Worker        add_sp_align    32*32*2+64*4*2
3259*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
3260*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3261*c0909341SAndroid Build Coastguard Workerendfunc
3262*c0909341SAndroid Build Coastguard Worker
3263*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
3264*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  16,  2
3265*c0909341SAndroid Build Coastguard Worker
3266*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3267*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
3268*c0909341SAndroid Build Coastguard Worker
3269*c0909341SAndroid Build Coastguard Worker        sub_sp_align    64*16*2+64*4*2
3270*c0909341SAndroid Build Coastguard Worker        add             r4,  sp,  #64*4*2
3271*c0909341SAndroid Build Coastguard Worker
3272*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
3273*c0909341SAndroid Build Coastguard Worker
3274*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
3275*c0909341SAndroid Build Coastguard Worker        add             r6,  r4,  #(\i*64*2)
3276*c0909341SAndroid Build Coastguard Worker.if \i > 0
3277*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(16 - \i)
3278*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3279*c0909341SAndroid Build Coastguard Worker        blt             1f
3280*c0909341SAndroid Build Coastguard Worker.endif
3281*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
3282*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
3283*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_4h_x64_neon
3284*c0909341SAndroid Build Coastguard Worker        add             r6,  r4,  #(\i*64*2)
3285*c0909341SAndroid Build Coastguard Worker        mov             r9,  #-2 // shift
3286*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x4_neon
3287*c0909341SAndroid Build Coastguard Worker.if \i < 12
3288*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3289*c0909341SAndroid Build Coastguard Worker.endif
3290*c0909341SAndroid Build Coastguard Worker.endr
3291*c0909341SAndroid Build Coastguard Worker        b               3f
3292*c0909341SAndroid Build Coastguard Worker
3293*c0909341SAndroid Build Coastguard Worker1:
3294*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3295*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3296*c0909341SAndroid Build Coastguard Worker2:
3297*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3298*c0909341SAndroid Build Coastguard Worker.rept 8
3299*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3300*c0909341SAndroid Build Coastguard Worker.endr
3301*c0909341SAndroid Build Coastguard Worker        bgt             2b
3302*c0909341SAndroid Build Coastguard Worker
3303*c0909341SAndroid Build Coastguard Worker3:
3304*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  inv_dct_4h_x16_neon
3305*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
3306*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
3307*c0909341SAndroid Build Coastguard Worker        add             r7,  r4,  #(\i*2)
3308*c0909341SAndroid Build Coastguard Worker        mov             r8,  #64*2
3309*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_4x16_neon
3310*c0909341SAndroid Build Coastguard Worker.endr
3311*c0909341SAndroid Build Coastguard Worker
3312*c0909341SAndroid Build Coastguard Worker        add_sp_align    64*16*2+64*4*2
3313*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
3314*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3315*c0909341SAndroid Build Coastguard Workerendfunc
3316*c0909341SAndroid Build Coastguard Worker
3317*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
3318*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  64,  2
3319*c0909341SAndroid Build Coastguard Worker
3320*c0909341SAndroid Build Coastguard Worker        push            {r4-r11,lr}
3321*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
3322*c0909341SAndroid Build Coastguard Worker
3323*c0909341SAndroid Build Coastguard Worker        sub_sp_align    16*32*2+64*4*2
3324*c0909341SAndroid Build Coastguard Worker        add             r5,  sp,  #64*4*2
3325*c0909341SAndroid Build Coastguard Worker
3326*c0909341SAndroid Build Coastguard Worker        movrel_local    r10, eob_16x32
3327*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3328*c0909341SAndroid Build Coastguard Worker
3329*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  inv_dct_4h_x16_neon
3330*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12, 16, 20, 24, 28
3331*c0909341SAndroid Build Coastguard Worker        add             r6,  r5,  #(\i*16*2)
3332*c0909341SAndroid Build Coastguard Worker.if \i > 0
3333*c0909341SAndroid Build Coastguard Worker        mov             r8,  #(32 - \i)
3334*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r11
3335*c0909341SAndroid Build Coastguard Worker        blt             1f
3336*c0909341SAndroid Build Coastguard Worker.if \i < 28
3337*c0909341SAndroid Build Coastguard Worker        ldrh            r11, [r10], #2
3338*c0909341SAndroid Build Coastguard Worker.endif
3339*c0909341SAndroid Build Coastguard Worker.endif
3340*c0909341SAndroid Build Coastguard Worker        add             r7,  r2,  #(\i*2)
3341*c0909341SAndroid Build Coastguard Worker        mov             r8,  #32*2
3342*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x4_neon
3343*c0909341SAndroid Build Coastguard Worker.endr
3344*c0909341SAndroid Build Coastguard Worker        b               3f
3345*c0909341SAndroid Build Coastguard Worker
3346*c0909341SAndroid Build Coastguard Worker1:
3347*c0909341SAndroid Build Coastguard Worker        vmov.i16        q2,  #0
3348*c0909341SAndroid Build Coastguard Worker        vmov.i16        q3,  #0
3349*c0909341SAndroid Build Coastguard Worker2:
3350*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
3351*c0909341SAndroid Build Coastguard Worker.rept 4
3352*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r6, :128]!
3353*c0909341SAndroid Build Coastguard Worker.endr
3354*c0909341SAndroid Build Coastguard Worker        bgt             2b
3355*c0909341SAndroid Build Coastguard Worker
3356*c0909341SAndroid Build Coastguard Worker3:
3357*c0909341SAndroid Build Coastguard Worker.irp i, 0, 4, 8, 12
3358*c0909341SAndroid Build Coastguard Worker        add             r7,  r5,  #(\i*2)
3359*c0909341SAndroid Build Coastguard Worker        mov             r8,  #16*2
3360*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_4h_x64_neon
3361*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  #(\i)
3362*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_4x64_neon
3363*c0909341SAndroid Build Coastguard Worker.endr
3364*c0909341SAndroid Build Coastguard Worker
3365*c0909341SAndroid Build Coastguard Worker        add_sp_align    16*32*2+64*4*2
3366*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
3367*c0909341SAndroid Build Coastguard Worker        pop             {r4-r11,pc}
3368*c0909341SAndroid Build Coastguard Workerendfunc
3369