xref: /aosp_15_r20/external/libdav1d/src/arm/64/itx.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker
31*c0909341SAndroid Build Coastguard Worker// The exported functions in this file have got the following signature:
32*c0909341SAndroid Build Coastguard Worker// void itxfm_add(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob);
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker// Most of the functions use the following register layout:
35*c0909341SAndroid Build Coastguard Worker// x0-x3  external parameters
36*c0909341SAndroid Build Coastguard Worker// x4     function pointer to first transform
37*c0909341SAndroid Build Coastguard Worker// x5     function pointer to second transform
38*c0909341SAndroid Build Coastguard Worker// x6     output parameter for helper function
39*c0909341SAndroid Build Coastguard Worker// x7     input parameter for helper function
40*c0909341SAndroid Build Coastguard Worker// x8     input stride for helper function
41*c0909341SAndroid Build Coastguard Worker// x9-x12 scratch variables for helper functions
42*c0909341SAndroid Build Coastguard Worker// x13    pointer to list of eob thresholds
43*c0909341SAndroid Build Coastguard Worker// x14    return pointer for helper function
44*c0909341SAndroid Build Coastguard Worker// x15    return pointer for main function
45*c0909341SAndroid Build Coastguard Worker
46*c0909341SAndroid Build Coastguard Worker// The SIMD registers most often use the following layout:
47*c0909341SAndroid Build Coastguard Worker// v0-v1   multiplication coefficients
48*c0909341SAndroid Build Coastguard Worker// v2-v7   scratch registers
49*c0909341SAndroid Build Coastguard Worker// v8-v15  unused
50*c0909341SAndroid Build Coastguard Worker// v16-v31 inputs/outputs of transforms
51*c0909341SAndroid Build Coastguard Worker
52*c0909341SAndroid Build Coastguard Worker// Potential further optimizations, that are left unimplemented for now:
53*c0909341SAndroid Build Coastguard Worker// - Trying to keep multiplication coefficients in registers across multiple
54*c0909341SAndroid Build Coastguard Worker//   transform functions. (The register layout is designed to potentially
55*c0909341SAndroid Build Coastguard Worker//   allow this.)
56*c0909341SAndroid Build Coastguard Worker// - Use a simplified version of the transforms themselves for cases where
57*c0909341SAndroid Build Coastguard Worker//   we know a significant number of inputs are zero. E.g. if the eob value
58*c0909341SAndroid Build Coastguard Worker//   indicates only a quarter of input values are set, for idct16 and up,
59*c0909341SAndroid Build Coastguard Worker//   a significant amount of calculation can be skipped, at the cost of more
60*c0909341SAndroid Build Coastguard Worker//   code duplication and special casing.
61*c0909341SAndroid Build Coastguard Worker
62*c0909341SAndroid Build Coastguard Workerconst idct_coeffs, align=4
63*c0909341SAndroid Build Coastguard Worker        // idct4
64*c0909341SAndroid Build Coastguard Worker        .short          2896, 2896*8, 1567, 3784
65*c0909341SAndroid Build Coastguard Worker        // idct8
66*c0909341SAndroid Build Coastguard Worker        .short          799, 4017, 3406, 2276
67*c0909341SAndroid Build Coastguard Worker        // idct16
68*c0909341SAndroid Build Coastguard Worker        .short          401, 4076, 3166, 2598
69*c0909341SAndroid Build Coastguard Worker        .short          1931, 3612, 3920, 1189
70*c0909341SAndroid Build Coastguard Worker        // idct32
71*c0909341SAndroid Build Coastguard Worker        .short          201, 4091, 3035, 2751
72*c0909341SAndroid Build Coastguard Worker        .short          1751, 3703, 3857, 1380
73*c0909341SAndroid Build Coastguard Worker        .short          995, 3973, 3513, 2106
74*c0909341SAndroid Build Coastguard Worker        .short          2440, 3290, 4052, 601
75*c0909341SAndroid Build Coastguard Workerendconst
76*c0909341SAndroid Build Coastguard Worker
77*c0909341SAndroid Build Coastguard Workerconst idct64_coeffs, align=4
78*c0909341SAndroid Build Coastguard Worker        .short          101*8, 4095*8, 2967*8, -2824*8
79*c0909341SAndroid Build Coastguard Worker        .short          1660*8, 3745*8, 3822*8, -1474*8
80*c0909341SAndroid Build Coastguard Worker        .short          4076, 401, 4017, 799
81*c0909341SAndroid Build Coastguard Worker        .short          0, 0, 0, 0
82*c0909341SAndroid Build Coastguard Worker
83*c0909341SAndroid Build Coastguard Worker        .short          4036*8, -700*8, 2359*8, 3349*8
84*c0909341SAndroid Build Coastguard Worker        .short          3461*8, -2191*8, 897*8, 3996*8
85*c0909341SAndroid Build Coastguard Worker        .short          -3166, -2598, -799, -4017
86*c0909341SAndroid Build Coastguard Worker        .short          0, 0, 0, 0
87*c0909341SAndroid Build Coastguard Worker
88*c0909341SAndroid Build Coastguard Worker        .short          501*8, 4065*8, 3229*8, -2520*8
89*c0909341SAndroid Build Coastguard Worker        .short          2019*8, 3564*8, 3948*8, -1092*8
90*c0909341SAndroid Build Coastguard Worker        .short          3612, 1931, 2276, 3406
91*c0909341SAndroid Build Coastguard Worker        .short          0, 0, 0, 0
92*c0909341SAndroid Build Coastguard Worker
93*c0909341SAndroid Build Coastguard Worker        .short          4085*8, -301*8, 2675*8, 3102*8
94*c0909341SAndroid Build Coastguard Worker        .short          3659*8, -1842*8, 1285*8, 3889*8
95*c0909341SAndroid Build Coastguard Worker        .short          -3920, -1189, -3406, -2276
96*c0909341SAndroid Build Coastguard Worker        .short          0, 0, 0, 0
97*c0909341SAndroid Build Coastguard Workerendconst
98*c0909341SAndroid Build Coastguard Worker
99*c0909341SAndroid Build Coastguard Workerconst iadst4_coeffs, align=4
100*c0909341SAndroid Build Coastguard Worker        // .h[4-5] can be interpreted as .s[2]
101*c0909341SAndroid Build Coastguard Worker        .short          1321, 3803, 2482, 3344, 3344, 0
102*c0909341SAndroid Build Coastguard Workerendconst
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Workerconst iadst8_coeffs, align=4
105*c0909341SAndroid Build Coastguard Worker        .short          4076, 401, 3612, 1931
106*c0909341SAndroid Build Coastguard Worker        .short          2598, 3166, 1189, 3920
107*c0909341SAndroid Build Coastguard Worker        // idct_coeffs
108*c0909341SAndroid Build Coastguard Worker        .short          2896, 0, 1567, 3784, 0, 0, 0, 0
109*c0909341SAndroid Build Coastguard Workerendconst
110*c0909341SAndroid Build Coastguard Worker
111*c0909341SAndroid Build Coastguard Workerconst iadst16_coeffs, align=4
112*c0909341SAndroid Build Coastguard Worker        .short          4091, 201, 3973, 995
113*c0909341SAndroid Build Coastguard Worker        .short          3703, 1751, 3290, 2440
114*c0909341SAndroid Build Coastguard Worker        .short          2751, 3035, 2106, 3513
115*c0909341SAndroid Build Coastguard Worker        .short          1380, 3857, 601, 4052
116*c0909341SAndroid Build Coastguard Workerendconst
117*c0909341SAndroid Build Coastguard Worker
118*c0909341SAndroid Build Coastguard Worker.macro smull_smlal d0, d1, s0, s1, c0, c1, sz
119*c0909341SAndroid Build Coastguard Worker        smull           \d0\().4s, \s0\().4h, \c0
120*c0909341SAndroid Build Coastguard Worker        smlal           \d0\().4s, \s1\().4h, \c1
121*c0909341SAndroid Build Coastguard Worker.ifc \sz, .8h
122*c0909341SAndroid Build Coastguard Worker        smull2          \d1\().4s, \s0\().8h, \c0
123*c0909341SAndroid Build Coastguard Worker        smlal2          \d1\().4s, \s1\().8h, \c1
124*c0909341SAndroid Build Coastguard Worker.endif
125*c0909341SAndroid Build Coastguard Worker.endm
126*c0909341SAndroid Build Coastguard Worker
127*c0909341SAndroid Build Coastguard Worker.macro smull_smlsl d0, d1, s0, s1, c0, c1, sz
128*c0909341SAndroid Build Coastguard Worker        smull           \d0\().4s, \s0\().4h, \c0
129*c0909341SAndroid Build Coastguard Worker        smlsl           \d0\().4s, \s1\().4h, \c1
130*c0909341SAndroid Build Coastguard Worker.ifc \sz, .8h
131*c0909341SAndroid Build Coastguard Worker        smull2          \d1\().4s, \s0\().8h, \c0
132*c0909341SAndroid Build Coastguard Worker        smlsl2          \d1\().4s, \s1\().8h, \c1
133*c0909341SAndroid Build Coastguard Worker.endif
134*c0909341SAndroid Build Coastguard Worker.endm
135*c0909341SAndroid Build Coastguard Worker
136*c0909341SAndroid Build Coastguard Worker.macro sqrshrn_sz d0, s0, s1, shift, sz
137*c0909341SAndroid Build Coastguard Worker        sqrshrn         \d0\().4h, \s0\().4s, \shift
138*c0909341SAndroid Build Coastguard Worker.ifc \sz, .8h
139*c0909341SAndroid Build Coastguard Worker        sqrshrn2        \d0\().8h, \s1\().4s, \shift
140*c0909341SAndroid Build Coastguard Worker.endif
141*c0909341SAndroid Build Coastguard Worker.endm
142*c0909341SAndroid Build Coastguard Worker
143*c0909341SAndroid Build Coastguard Worker.macro scale_input sz, c, r0, r1, r2 r3, r4, r5, r6, r7
144*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r0\sz,  \r0\sz,  \c
145*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r1\sz,  \r1\sz,  \c
146*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r2\sz,  \r2\sz,  \c
147*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r3\sz,  \r3\sz,  \c
148*c0909341SAndroid Build Coastguard Worker.ifnb \r4
149*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r4\sz,  \r4\sz,  \c
150*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r5\sz,  \r5\sz,  \c
151*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r6\sz,  \r6\sz,  \c
152*c0909341SAndroid Build Coastguard Worker        sqrdmulh        \r7\sz,  \r7\sz,  \c
153*c0909341SAndroid Build Coastguard Worker.endif
154*c0909341SAndroid Build Coastguard Worker.endm
155*c0909341SAndroid Build Coastguard Worker
156*c0909341SAndroid Build Coastguard Worker.macro load_add_store load, shift, addsrc, adddst, narrowsrc, narrowdst, store, dst, src, shiftbits=4
157*c0909341SAndroid Build Coastguard Worker.ifnb \load
158*c0909341SAndroid Build Coastguard Worker        ld1             {\load},  [\src], x1
159*c0909341SAndroid Build Coastguard Worker.endif
160*c0909341SAndroid Build Coastguard Worker.ifnb \shift
161*c0909341SAndroid Build Coastguard Worker        srshr           \shift,  \shift,  #\shiftbits
162*c0909341SAndroid Build Coastguard Worker.endif
163*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
164*c0909341SAndroid Build Coastguard Worker        uaddw           \adddst, \adddst, \addsrc
165*c0909341SAndroid Build Coastguard Worker.endif
166*c0909341SAndroid Build Coastguard Worker.ifnb \narrowsrc
167*c0909341SAndroid Build Coastguard Worker        sqxtun          \narrowdst, \narrowsrc
168*c0909341SAndroid Build Coastguard Worker.endif
169*c0909341SAndroid Build Coastguard Worker.ifnb \store
170*c0909341SAndroid Build Coastguard Worker        st1             {\store},  [\dst], x1
171*c0909341SAndroid Build Coastguard Worker.endif
172*c0909341SAndroid Build Coastguard Worker.endm
173*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x16 dst, src
174*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
175*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v16.8h,      ,       ,       ,      ,      , \dst, \src
176*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v17.8h,      ,       ,       ,      ,      , \dst, \src
177*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8b, v18.8h, v2.8b, v16.8h,       ,      ,      , \dst, \src
178*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8b, v19.8h, v3.8b, v17.8h, v16.8h, v2.8b,      , \dst, \src
179*c0909341SAndroid Build Coastguard Worker        load_add_store  v6.8b, v20.8h, v4.8b, v18.8h, v17.8h, v3.8b, v2.8b, \dst, \src
180*c0909341SAndroid Build Coastguard Worker        load_add_store  v7.8b, v21.8h, v5.8b, v19.8h, v18.8h, v4.8b, v3.8b, \dst, \src
181*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v22.8h, v6.8b, v20.8h, v19.8h, v5.8b, v4.8b, \dst, \src
182*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v23.8h, v7.8b, v21.8h, v20.8h, v6.8b, v5.8b, \dst, \src
183*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8b, v24.8h, v2.8b, v22.8h, v21.8h, v7.8b, v6.8b, \dst, \src
184*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8b, v25.8h, v3.8b, v23.8h, v22.8h, v2.8b, v7.8b, \dst, \src
185*c0909341SAndroid Build Coastguard Worker        load_add_store  v6.8b, v26.8h, v4.8b, v24.8h, v23.8h, v3.8b, v2.8b, \dst, \src
186*c0909341SAndroid Build Coastguard Worker        load_add_store  v7.8b, v27.8h, v5.8b, v25.8h, v24.8h, v4.8b, v3.8b, \dst, \src
187*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v28.8h, v6.8b, v26.8h, v25.8h, v5.8b, v4.8b, \dst, \src
188*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v29.8h, v7.8b, v27.8h, v26.8h, v6.8b, v5.8b, \dst, \src
189*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8b, v30.8h, v2.8b, v28.8h, v27.8h, v7.8b, v6.8b, \dst, \src
190*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8b, v31.8h, v3.8b, v29.8h, v28.8h, v2.8b, v7.8b, \dst, \src
191*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v4.8b, v30.8h, v29.8h, v3.8b, v2.8b, \dst, \src
192*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v5.8b, v31.8h, v30.8h, v4.8b, v3.8b, \dst, \src
193*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       , v31.8h, v5.8b, v4.8b, \dst, \src
194*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       ,       ,      , v5.8b, \dst, \src
195*c0909341SAndroid Build Coastguard Worker.endm
196*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x8 dst, src, shiftbits=4
197*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
198*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v16.8h,      ,       ,       ,      ,      , \dst, \src, \shiftbits
199*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v17.8h,      ,       ,       ,      ,      , \dst, \src, \shiftbits
200*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8b, v18.8h, v2.8b, v16.8h,       ,      ,      , \dst, \src, \shiftbits
201*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8b, v19.8h, v3.8b, v17.8h, v16.8h, v2.8b,      , \dst, \src, \shiftbits
202*c0909341SAndroid Build Coastguard Worker        load_add_store  v6.8b, v20.8h, v4.8b, v18.8h, v17.8h, v3.8b, v2.8b, \dst, \src, \shiftbits
203*c0909341SAndroid Build Coastguard Worker        load_add_store  v7.8b, v21.8h, v5.8b, v19.8h, v18.8h, v4.8b, v3.8b, \dst, \src, \shiftbits
204*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v22.8h, v6.8b, v20.8h, v19.8h, v5.8b, v4.8b, \dst, \src, \shiftbits
205*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v23.8h, v7.8b, v21.8h, v20.8h, v6.8b, v5.8b, \dst, \src, \shiftbits
206*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v2.8b, v22.8h, v21.8h, v7.8b, v6.8b, \dst, \src, \shiftbits
207*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v3.8b, v23.8h, v22.8h, v2.8b, v7.8b, \dst, \src, \shiftbits
208*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       , v23.8h, v3.8b, v2.8b, \dst, \src, \shiftbits
209*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       ,       ,      , v3.8b, \dst, \src, \shiftbits
210*c0909341SAndroid Build Coastguard Worker.endm
211*c0909341SAndroid Build Coastguard Worker.macro load_add_store_8x4 dst, src
212*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
213*c0909341SAndroid Build Coastguard Worker        load_add_store  v2.8b, v16.8h,      ,       ,       ,      ,      , \dst, \src
214*c0909341SAndroid Build Coastguard Worker        load_add_store  v3.8b, v17.8h,      ,       ,       ,      ,      , \dst, \src
215*c0909341SAndroid Build Coastguard Worker        load_add_store  v4.8b, v18.8h, v2.8b, v16.8h,       ,      ,      , \dst, \src
216*c0909341SAndroid Build Coastguard Worker        load_add_store  v5.8b, v19.8h, v3.8b, v17.8h, v16.8h, v2.8b,      , \dst, \src
217*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v4.8b, v18.8h, v17.8h, v3.8b, v2.8b, \dst, \src
218*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       , v5.8b, v19.8h, v18.8h, v4.8b, v3.8b, \dst, \src
219*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       , v19.8h, v5.8b, v4.8b, \dst, \src
220*c0909341SAndroid Build Coastguard Worker        load_add_store       ,       ,      ,       ,       ,      , v5.8b, \dst, \src
221*c0909341SAndroid Build Coastguard Worker.endm
222*c0909341SAndroid Build Coastguard Worker.macro load_add_store4 load, inssrc, insdst, shift, addsrc, adddst, narrowsrc, narrowdst, store, dst, src
223*c0909341SAndroid Build Coastguard Worker.ifnb \load
224*c0909341SAndroid Build Coastguard Worker        ld1             {\load}[0],  [\src], x1
225*c0909341SAndroid Build Coastguard Worker.endif
226*c0909341SAndroid Build Coastguard Worker.ifnb \inssrc
227*c0909341SAndroid Build Coastguard Worker        ins             \insdst\().d[1],   \inssrc\().d[0]
228*c0909341SAndroid Build Coastguard Worker.endif
229*c0909341SAndroid Build Coastguard Worker.ifnb \shift
230*c0909341SAndroid Build Coastguard Worker        srshr           \shift,  \shift,  #4
231*c0909341SAndroid Build Coastguard Worker.endif
232*c0909341SAndroid Build Coastguard Worker.ifnb \load
233*c0909341SAndroid Build Coastguard Worker        ld1             {\load}[1],  [\src], x1
234*c0909341SAndroid Build Coastguard Worker.endif
235*c0909341SAndroid Build Coastguard Worker.ifnb \addsrc
236*c0909341SAndroid Build Coastguard Worker        uaddw           \adddst, \adddst, \addsrc
237*c0909341SAndroid Build Coastguard Worker.endif
238*c0909341SAndroid Build Coastguard Worker.ifnb \store
239*c0909341SAndroid Build Coastguard Worker        st1             {\store}[0],  [\dst], x1
240*c0909341SAndroid Build Coastguard Worker.endif
241*c0909341SAndroid Build Coastguard Worker.ifnb \narrowsrc
242*c0909341SAndroid Build Coastguard Worker        sqxtun          \narrowdst, \narrowsrc
243*c0909341SAndroid Build Coastguard Worker.endif
244*c0909341SAndroid Build Coastguard Worker.ifnb \store
245*c0909341SAndroid Build Coastguard Worker        st1             {\store}[1],  [\dst], x1
246*c0909341SAndroid Build Coastguard Worker.endif
247*c0909341SAndroid Build Coastguard Worker.endm
248*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x16 dst, src
249*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
250*c0909341SAndroid Build Coastguard Worker        load_add_store4 v0.s, v17, v16,       ,      ,       ,       ,      ,     , \dst, \src
251*c0909341SAndroid Build Coastguard Worker        load_add_store4 v1.s, v19, v18,       ,      ,       ,       ,      ,     , \dst, \src
252*c0909341SAndroid Build Coastguard Worker        load_add_store4 v2.s, v21, v20, v16.8h,      ,       ,       ,      ,     , \dst, \src
253*c0909341SAndroid Build Coastguard Worker        load_add_store4 v3.s, v23, v22, v18.8h, v0.8b, v16.8h,       ,      ,     , \dst, \src
254*c0909341SAndroid Build Coastguard Worker        load_add_store4 v4.s, v25, v24, v20.8h, v1.8b, v18.8h, v16.8h, v0.8b,     , \dst, \src
255*c0909341SAndroid Build Coastguard Worker        load_add_store4 v5.s, v27, v26, v22.8h, v2.8b, v20.8h, v18.8h, v1.8b, v0.s, \dst, \src
256*c0909341SAndroid Build Coastguard Worker        load_add_store4 v6.s, v29, v28, v24.8h, v3.8b, v22.8h, v20.8h, v2.8b, v1.s, \dst, \src
257*c0909341SAndroid Build Coastguard Worker        load_add_store4 v7.s, v31, v30, v26.8h, v4.8b, v24.8h, v22.8h, v3.8b, v2.s, \dst, \src
258*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v28.8h, v5.8b, v26.8h, v24.8h, v4.8b, v3.s, \dst, \src
259*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v30.8h, v6.8b, v28.8h, v26.8h, v5.8b, v4.s, \dst, \src
260*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       , v7.8b, v30.8h, v28.8h, v6.8b, v5.s, \dst, \src
261*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,      ,       , v30.8h, v7.8b, v6.s, \dst, \src
262*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,      ,       ,       ,      , v7.s, \dst, \src
263*c0909341SAndroid Build Coastguard Worker.endm
264*c0909341SAndroid Build Coastguard Worker.macro load_add_store_4x8 dst, src
265*c0909341SAndroid Build Coastguard Worker        mov             \src, \dst
266*c0909341SAndroid Build Coastguard Worker        load_add_store4 v0.s, v17, v16,       ,      ,       ,       ,      ,     , \dst, \src
267*c0909341SAndroid Build Coastguard Worker        load_add_store4 v1.s, v19, v18,       ,      ,       ,       ,      ,     , \dst, \src
268*c0909341SAndroid Build Coastguard Worker        load_add_store4 v2.s, v21, v20, v16.8h,      ,       ,       ,      ,     , \dst, \src
269*c0909341SAndroid Build Coastguard Worker        load_add_store4 v3.s, v23, v22, v18.8h, v0.8b, v16.8h,       ,      ,     , \dst, \src
270*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v20.8h, v1.8b, v18.8h, v16.8h, v0.8b,     , \dst, \src
271*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    , v22.8h, v2.8b, v20.8h, v18.8h, v1.8b, v0.s, \dst, \src
272*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       , v3.8b, v22.8h, v20.8h, v2.8b, v1.s, \dst, \src
273*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,      ,       , v22.8h, v3.8b, v2.s, \dst, \src
274*c0909341SAndroid Build Coastguard Worker        load_add_store4     ,    ,    ,       ,      ,       ,       ,      , v3.s, \dst, \src
275*c0909341SAndroid Build Coastguard Worker.endm
276*c0909341SAndroid Build Coastguard Worker
277*c0909341SAndroid Build Coastguard Worker.macro idct_dc w, h, shift
278*c0909341SAndroid Build Coastguard Worker        cbnz            w3,  1f
279*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
280*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.8h}, [x2]
281*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
282*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v0.h[0]
283*c0909341SAndroid Build Coastguard Worker        strh            wzr, [x2]
284*c0909341SAndroid Build Coastguard Worker.if (\w == 2*\h) || (2*\w == \h)
285*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v0.h[0]
286*c0909341SAndroid Build Coastguard Worker.endif
287*c0909341SAndroid Build Coastguard Worker.if \shift > 0
288*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #\shift
289*c0909341SAndroid Build Coastguard Worker.endif
290*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v0.h[0]
291*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #4
292*c0909341SAndroid Build Coastguard Worker        mov             w4,  #\h
293*c0909341SAndroid Build Coastguard Worker        b               idct_dc_w\w\()_neon
294*c0909341SAndroid Build Coastguard Worker1:
295*c0909341SAndroid Build Coastguard Worker.endm
296*c0909341SAndroid Build Coastguard Worker
297*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w4_neon
298*c0909341SAndroid Build Coastguard Worker1:
299*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[0], [x0], x1
300*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[1], [x0], x1
301*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[0], [x0], x1
302*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[1], [x0], x1
303*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #4
304*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
305*c0909341SAndroid Build Coastguard Worker        uaddw           v0.8h,   v16.8h,  v0.8b
306*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v0.8h
307*c0909341SAndroid Build Coastguard Worker        uaddw           v1.8h,   v16.8h,  v1.8b
308*c0909341SAndroid Build Coastguard Worker        st1             {v0.s}[0], [x0], x1
309*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v1.8h
310*c0909341SAndroid Build Coastguard Worker        st1             {v0.s}[1], [x0], x1
311*c0909341SAndroid Build Coastguard Worker        st1             {v1.s}[0], [x0], x1
312*c0909341SAndroid Build Coastguard Worker        st1             {v1.s}[1], [x0], x1
313*c0909341SAndroid Build Coastguard Worker        b.gt            1b
314*c0909341SAndroid Build Coastguard Worker        ret
315*c0909341SAndroid Build Coastguard Workerendfunc
316*c0909341SAndroid Build Coastguard Worker
317*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w8_neon
318*c0909341SAndroid Build Coastguard Worker1:
319*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [x0], x1
320*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8b}, [x0], x1
321*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b}, [x0], x1
322*c0909341SAndroid Build Coastguard Worker        uaddw           v20.8h,  v16.8h, v0.8b
323*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8b}, [x0], x1
324*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
325*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #4
326*c0909341SAndroid Build Coastguard Worker        uaddw           v21.8h,  v16.8h, v1.8b
327*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v20.8h
328*c0909341SAndroid Build Coastguard Worker        uaddw           v22.8h,  v16.8h, v2.8b
329*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v21.8h
330*c0909341SAndroid Build Coastguard Worker        uaddw           v23.8h,  v16.8h, v3.8b
331*c0909341SAndroid Build Coastguard Worker        st1             {v0.8b}, [x0], x1
332*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v22.8h
333*c0909341SAndroid Build Coastguard Worker        st1             {v1.8b}, [x0], x1
334*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v23.8h
335*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [x0], x1
336*c0909341SAndroid Build Coastguard Worker        st1             {v3.8b}, [x0], x1
337*c0909341SAndroid Build Coastguard Worker        b.gt            1b
338*c0909341SAndroid Build Coastguard Worker        ret
339*c0909341SAndroid Build Coastguard Workerendfunc
340*c0909341SAndroid Build Coastguard Worker
341*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w16_neon
342*c0909341SAndroid Build Coastguard Worker1:
343*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b}, [x0], x1
344*c0909341SAndroid Build Coastguard Worker        ld1             {v1.16b}, [x0], x1
345*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b}, [x0], x1
346*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #4
347*c0909341SAndroid Build Coastguard Worker        uaddw           v20.8h,  v16.8h, v0.8b
348*c0909341SAndroid Build Coastguard Worker        uaddw2          v21.8h,  v16.8h, v0.16b
349*c0909341SAndroid Build Coastguard Worker        ld1             {v3.16b}, [x0], x1
350*c0909341SAndroid Build Coastguard Worker        uaddw           v22.8h,  v16.8h, v1.8b
351*c0909341SAndroid Build Coastguard Worker        uaddw2          v23.8h,  v16.8h, v1.16b
352*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
353*c0909341SAndroid Build Coastguard Worker        uaddw           v24.8h,  v16.8h, v2.8b
354*c0909341SAndroid Build Coastguard Worker        uaddw2          v25.8h,  v16.8h, v2.16b
355*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v20.8h
356*c0909341SAndroid Build Coastguard Worker        sqxtun2         v0.16b,  v21.8h
357*c0909341SAndroid Build Coastguard Worker        uaddw           v26.8h,  v16.8h, v3.8b
358*c0909341SAndroid Build Coastguard Worker        uaddw2          v27.8h,  v16.8h, v3.16b
359*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v22.8h
360*c0909341SAndroid Build Coastguard Worker        sqxtun2         v1.16b,  v23.8h
361*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v24.8h
362*c0909341SAndroid Build Coastguard Worker        sqxtun2         v2.16b,  v25.8h
363*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b}, [x0], x1
364*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v26.8h
365*c0909341SAndroid Build Coastguard Worker        sqxtun2         v3.16b,  v27.8h
366*c0909341SAndroid Build Coastguard Worker        st1             {v1.16b}, [x0], x1
367*c0909341SAndroid Build Coastguard Worker        st1             {v2.16b}, [x0], x1
368*c0909341SAndroid Build Coastguard Worker        st1             {v3.16b}, [x0], x1
369*c0909341SAndroid Build Coastguard Worker        b.gt            1b
370*c0909341SAndroid Build Coastguard Worker        ret
371*c0909341SAndroid Build Coastguard Workerendfunc
372*c0909341SAndroid Build Coastguard Worker
373*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w32_neon
374*c0909341SAndroid Build Coastguard Worker1:
375*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b, v1.16b},  [x0], x1
376*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #2
377*c0909341SAndroid Build Coastguard Worker        uaddw           v20.8h,  v16.8h, v0.8b
378*c0909341SAndroid Build Coastguard Worker        uaddw2          v21.8h,  v16.8h, v0.16b
379*c0909341SAndroid Build Coastguard Worker        ld1             {v2.16b, v3.16b},  [x0]
380*c0909341SAndroid Build Coastguard Worker        uaddw           v22.8h,  v16.8h, v1.8b
381*c0909341SAndroid Build Coastguard Worker        uaddw2          v23.8h,  v16.8h, v1.16b
382*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1
383*c0909341SAndroid Build Coastguard Worker        uaddw           v24.8h,  v16.8h, v2.8b
384*c0909341SAndroid Build Coastguard Worker        uaddw2          v25.8h,  v16.8h, v2.16b
385*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v20.8h
386*c0909341SAndroid Build Coastguard Worker        sqxtun2         v0.16b,  v21.8h
387*c0909341SAndroid Build Coastguard Worker        uaddw           v26.8h,  v16.8h, v3.8b
388*c0909341SAndroid Build Coastguard Worker        uaddw2          v27.8h,  v16.8h, v3.16b
389*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v22.8h
390*c0909341SAndroid Build Coastguard Worker        sqxtun2         v1.16b,  v23.8h
391*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v24.8h
392*c0909341SAndroid Build Coastguard Worker        sqxtun2         v2.16b,  v25.8h
393*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b, v1.16b},  [x0], x1
394*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v26.8h
395*c0909341SAndroid Build Coastguard Worker        sqxtun2         v3.16b,  v27.8h
396*c0909341SAndroid Build Coastguard Worker        st1             {v2.16b, v3.16b},  [x0], x1
397*c0909341SAndroid Build Coastguard Worker        b.gt            1b
398*c0909341SAndroid Build Coastguard Worker        ret
399*c0909341SAndroid Build Coastguard Workerendfunc
400*c0909341SAndroid Build Coastguard Worker
401*c0909341SAndroid Build Coastguard Workerfunction idct_dc_w64_neon
402*c0909341SAndroid Build Coastguard Worker1:
403*c0909341SAndroid Build Coastguard Worker        ld1             {v0.16b, v1.16b, v2.16b, v3.16b},  [x0]
404*c0909341SAndroid Build Coastguard Worker        subs            w4,  w4,  #1
405*c0909341SAndroid Build Coastguard Worker        uaddw           v20.8h,  v16.8h, v0.8b
406*c0909341SAndroid Build Coastguard Worker        uaddw2          v21.8h,  v16.8h, v0.16b
407*c0909341SAndroid Build Coastguard Worker        uaddw           v22.8h,  v16.8h, v1.8b
408*c0909341SAndroid Build Coastguard Worker        uaddw2          v23.8h,  v16.8h, v1.16b
409*c0909341SAndroid Build Coastguard Worker        uaddw           v24.8h,  v16.8h, v2.8b
410*c0909341SAndroid Build Coastguard Worker        uaddw2          v25.8h,  v16.8h, v2.16b
411*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v20.8h
412*c0909341SAndroid Build Coastguard Worker        sqxtun2         v0.16b,  v21.8h
413*c0909341SAndroid Build Coastguard Worker        uaddw           v26.8h,  v16.8h, v3.8b
414*c0909341SAndroid Build Coastguard Worker        uaddw2          v27.8h,  v16.8h, v3.16b
415*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v22.8h
416*c0909341SAndroid Build Coastguard Worker        sqxtun2         v1.16b,  v23.8h
417*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v24.8h
418*c0909341SAndroid Build Coastguard Worker        sqxtun2         v2.16b,  v25.8h
419*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v26.8h
420*c0909341SAndroid Build Coastguard Worker        sqxtun2         v3.16b,  v27.8h
421*c0909341SAndroid Build Coastguard Worker        st1             {v0.16b, v1.16b, v2.16b, v3.16b},  [x0], x1
422*c0909341SAndroid Build Coastguard Worker        b.gt            1b
423*c0909341SAndroid Build Coastguard Worker        ret
424*c0909341SAndroid Build Coastguard Workerendfunc
425*c0909341SAndroid Build Coastguard Worker
426*c0909341SAndroid Build Coastguard Worker.macro iwht4
427*c0909341SAndroid Build Coastguard Worker        add             v16.4h,  v16.4h,  v17.4h
428*c0909341SAndroid Build Coastguard Worker        sub             v21.4h,  v18.4h,  v19.4h
429*c0909341SAndroid Build Coastguard Worker        sub             v20.4h,  v16.4h,  v21.4h
430*c0909341SAndroid Build Coastguard Worker        sshr            v20.4h,  v20.4h,  #1
431*c0909341SAndroid Build Coastguard Worker        sub             v18.4h,  v20.4h,  v17.4h
432*c0909341SAndroid Build Coastguard Worker        sub             v17.4h,  v20.4h,  v19.4h
433*c0909341SAndroid Build Coastguard Worker        add             v19.4h,  v21.4h,  v18.4h
434*c0909341SAndroid Build Coastguard Worker        sub             v16.4h,  v16.4h,  v17.4h
435*c0909341SAndroid Build Coastguard Worker.endm
436*c0909341SAndroid Build Coastguard Worker
437*c0909341SAndroid Build Coastguard Worker.macro idct_4 r0, r1, r2, r3, sz
438*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  \r1, \r3, v0.h[3], v0.h[2], \sz
439*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  \r1, \r3, v0.h[2], v0.h[3], \sz
440*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  \r0, \r2, v0.h[0], v0.h[0], \sz
441*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v6,  v6,  v7,  #12, \sz
442*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v7,  v4,  v5,  #12, \sz
443*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  \r0, \r2, v0.h[0], v0.h[0], \sz
444*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v2,  v2,  v3,  #12, \sz
445*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v3,  v4,  v5,  #12, \sz
446*c0909341SAndroid Build Coastguard Worker        sqadd           \r0\sz,  v2\sz,   v6\sz
447*c0909341SAndroid Build Coastguard Worker        sqsub           \r3\sz,  v2\sz,   v6\sz
448*c0909341SAndroid Build Coastguard Worker        sqadd           \r1\sz,  v3\sz,   v7\sz
449*c0909341SAndroid Build Coastguard Worker        sqsub           \r2\sz,  v3\sz,   v7\sz
450*c0909341SAndroid Build Coastguard Worker.endm
451*c0909341SAndroid Build Coastguard Worker
452*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x4_neon, export=1
453*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
454*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4h}, [x16]
455*c0909341SAndroid Build Coastguard Worker        idct_4          v16, v17, v18, v19, .4h
456*c0909341SAndroid Build Coastguard Worker        ret
457*c0909341SAndroid Build Coastguard Workerendfunc
458*c0909341SAndroid Build Coastguard Worker
459*c0909341SAndroid Build Coastguard Workerfunction inv_dct_8h_x4_neon, export=1
460*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
461*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4h}, [x16]
462*c0909341SAndroid Build Coastguard Worker        idct_4          v16, v17, v18, v19, .8h
463*c0909341SAndroid Build Coastguard Worker        ret
464*c0909341SAndroid Build Coastguard Workerendfunc
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker.macro iadst_4x4 o0, o1, o2, o3
467*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst4_coeffs
468*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
469*c0909341SAndroid Build Coastguard Worker
470*c0909341SAndroid Build Coastguard Worker        ssubl           v3.4s,   v16.4h,  v18.4h
471*c0909341SAndroid Build Coastguard Worker        smull           v4.4s,   v16.4h,  v0.h[0]
472*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,   v18.4h,  v0.h[1]
473*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,   v19.4h,  v0.h[2]
474*c0909341SAndroid Build Coastguard Worker        smull           v7.4s,   v17.4h,  v0.h[3]
475*c0909341SAndroid Build Coastguard Worker        saddw           v3.4s,   v3.4s,   v19.4h
476*c0909341SAndroid Build Coastguard Worker        smull           v5.4s,   v16.4h,  v0.h[2]
477*c0909341SAndroid Build Coastguard Worker        smlsl           v5.4s,   v18.4h,  v0.h[0]
478*c0909341SAndroid Build Coastguard Worker        smlsl           v5.4s,   v19.4h,  v0.h[1]
479*c0909341SAndroid Build Coastguard Worker
480*c0909341SAndroid Build Coastguard Worker        add             \o3\().4s, v4.4s,     v5.4s
481*c0909341SAndroid Build Coastguard Worker        mul             \o2\().4s, v3.4s,     v0.s[2]
482*c0909341SAndroid Build Coastguard Worker        add             \o0\().4s, v4.4s,     v7.4s
483*c0909341SAndroid Build Coastguard Worker        add             \o1\().4s, v5.4s,     v7.4s
484*c0909341SAndroid Build Coastguard Worker        sub             \o3\().4s, \o3\().4s, v7.4s
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o0\().4h, \o0\().4s, #12
487*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o2\().4h, \o2\().4s, #12
488*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o1\().4h, \o1\().4s, #12
489*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o3\().4h, \o3\().4s, #12
490*c0909341SAndroid Build Coastguard Worker.endm
491*c0909341SAndroid Build Coastguard Worker
492*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x4_neon, export=1
493*c0909341SAndroid Build Coastguard Worker        iadst_4x4       v16, v17, v18, v19
494*c0909341SAndroid Build Coastguard Worker        ret
495*c0909341SAndroid Build Coastguard Workerendfunc
496*c0909341SAndroid Build Coastguard Worker
497*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x4_neon, export=1
498*c0909341SAndroid Build Coastguard Worker        iadst_4x4       v19, v18, v17, v16
499*c0909341SAndroid Build Coastguard Worker        ret
500*c0909341SAndroid Build Coastguard Workerendfunc
501*c0909341SAndroid Build Coastguard Worker
502*c0909341SAndroid Build Coastguard Worker.macro iadst_8x4 o0, o1, o2, o3
503*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst4_coeffs
504*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
505*c0909341SAndroid Build Coastguard Worker
506*c0909341SAndroid Build Coastguard Worker        ssubl           v2.4s,   v16.4h,  v18.4h
507*c0909341SAndroid Build Coastguard Worker        ssubl2          v3.4s,   v16.8h,  v18.8h
508*c0909341SAndroid Build Coastguard Worker        smull           v4.4s,   v16.4h,  v0.h[0]
509*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,   v18.4h,  v0.h[1]
510*c0909341SAndroid Build Coastguard Worker        smlal           v4.4s,   v19.4h,  v0.h[2]
511*c0909341SAndroid Build Coastguard Worker        smull2          v5.4s,   v16.8h,  v0.h[0]
512*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,   v18.8h,  v0.h[1]
513*c0909341SAndroid Build Coastguard Worker        smlal2          v5.4s,   v19.8h,  v0.h[2]
514*c0909341SAndroid Build Coastguard Worker        saddw           v2.4s,   v2.4s,   v19.4h
515*c0909341SAndroid Build Coastguard Worker        saddw2          v3.4s,   v3.4s,   v19.8h
516*c0909341SAndroid Build Coastguard Worker        smull           v6.4s,   v16.4h,  v0.h[2]
517*c0909341SAndroid Build Coastguard Worker        smlsl           v6.4s,   v18.4h,  v0.h[0]
518*c0909341SAndroid Build Coastguard Worker        smlsl           v6.4s,   v19.4h,  v0.h[1]
519*c0909341SAndroid Build Coastguard Worker        smull2          v7.4s,   v16.8h,  v0.h[2]
520*c0909341SAndroid Build Coastguard Worker        smlsl2          v7.4s,   v18.8h,  v0.h[0]
521*c0909341SAndroid Build Coastguard Worker        smlsl2          v7.4s,   v19.8h,  v0.h[1]
522*c0909341SAndroid Build Coastguard Worker
523*c0909341SAndroid Build Coastguard Worker        mul             v18.4s,  v2.4s,   v0.s[2]
524*c0909341SAndroid Build Coastguard Worker        mul             v19.4s,  v3.4s,   v0.s[2]
525*c0909341SAndroid Build Coastguard Worker
526*c0909341SAndroid Build Coastguard Worker        smull           v2.4s,   v17.4h,  v0.h[3]
527*c0909341SAndroid Build Coastguard Worker        smull2          v3.4s,   v17.8h,  v0.h[3]
528*c0909341SAndroid Build Coastguard Worker
529*c0909341SAndroid Build Coastguard Worker        add             v16.4s,  v4.4s,   v2.4s // out0
530*c0909341SAndroid Build Coastguard Worker        add             v17.4s,  v5.4s,   v3.4s
531*c0909341SAndroid Build Coastguard Worker
532*c0909341SAndroid Build Coastguard Worker        add             v4.4s,   v4.4s,   v6.4s // out3
533*c0909341SAndroid Build Coastguard Worker        add             v5.4s,   v5.4s,   v7.4s
534*c0909341SAndroid Build Coastguard Worker
535*c0909341SAndroid Build Coastguard Worker        add             v6.4s,   v6.4s,   v2.4s // out1
536*c0909341SAndroid Build Coastguard Worker        add             v7.4s,   v7.4s,   v3.4s
537*c0909341SAndroid Build Coastguard Worker
538*c0909341SAndroid Build Coastguard Worker        sub             v4.4s,   v4.4s,   v2.4s // out3
539*c0909341SAndroid Build Coastguard Worker        sub             v5.4s,   v5.4s,   v3.4s
540*c0909341SAndroid Build Coastguard Worker
541*c0909341SAndroid Build Coastguard Worker        sqrshrn         v18.4h,  v18.4s, #12
542*c0909341SAndroid Build Coastguard Worker        sqrshrn2        v18.8h,  v19.4s, #12
543*c0909341SAndroid Build Coastguard Worker
544*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o0\().4h, v16.4s, #12
545*c0909341SAndroid Build Coastguard Worker        sqrshrn2        \o0\().8h, v17.4s, #12
546*c0909341SAndroid Build Coastguard Worker
547*c0909341SAndroid Build Coastguard Worker.ifc \o2, v17
548*c0909341SAndroid Build Coastguard Worker        mov             v17.16b,   v18.16b
549*c0909341SAndroid Build Coastguard Worker.endif
550*c0909341SAndroid Build Coastguard Worker
551*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o1\().4h, v6.4s,  #12
552*c0909341SAndroid Build Coastguard Worker        sqrshrn2        \o1\().8h, v7.4s,  #12
553*c0909341SAndroid Build Coastguard Worker
554*c0909341SAndroid Build Coastguard Worker        sqrshrn         \o3\().4h, v4.4s,  #12
555*c0909341SAndroid Build Coastguard Worker        sqrshrn2        \o3\().8h, v5.4s,  #12
556*c0909341SAndroid Build Coastguard Worker.endm
557*c0909341SAndroid Build Coastguard Worker
558*c0909341SAndroid Build Coastguard Workerfunction inv_adst_8h_x4_neon, export=1
559*c0909341SAndroid Build Coastguard Worker        iadst_8x4       v16, v17, v18, v19
560*c0909341SAndroid Build Coastguard Worker        ret
561*c0909341SAndroid Build Coastguard Workerendfunc
562*c0909341SAndroid Build Coastguard Worker
563*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_8h_x4_neon, export=1
564*c0909341SAndroid Build Coastguard Worker        iadst_8x4       v19, v18, v17, v16
565*c0909341SAndroid Build Coastguard Worker        ret
566*c0909341SAndroid Build Coastguard Workerendfunc
567*c0909341SAndroid Build Coastguard Worker
568*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x4_neon, export=1
569*c0909341SAndroid Build Coastguard Worker        mov             w16, #(5793-4096)*8
570*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
571*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v4.4h,   v16.4h,  v0.h[0]
572*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v5.4h,   v17.4h,  v0.h[0]
573*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v6.4h,   v18.4h,  v0.h[0]
574*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v7.4h,   v19.4h,  v0.h[0]
575*c0909341SAndroid Build Coastguard Worker        sqadd           v16.4h,  v16.4h,  v4.4h
576*c0909341SAndroid Build Coastguard Worker        sqadd           v17.4h,  v17.4h,  v5.4h
577*c0909341SAndroid Build Coastguard Worker        sqadd           v18.4h,  v18.4h,  v6.4h
578*c0909341SAndroid Build Coastguard Worker        sqadd           v19.4h,  v19.4h,  v7.4h
579*c0909341SAndroid Build Coastguard Worker        ret
580*c0909341SAndroid Build Coastguard Workerendfunc
581*c0909341SAndroid Build Coastguard Worker
582*c0909341SAndroid Build Coastguard Workerfunction inv_identity_8h_x4_neon, export=1
583*c0909341SAndroid Build Coastguard Worker        mov             w16, #(5793-4096)*8
584*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
585*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v4.8h,   v16.8h,  v0.h[0]
586*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v5.8h,   v17.8h,  v0.h[0]
587*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v6.8h,   v18.8h,  v0.h[0]
588*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v7.8h,   v19.8h,  v0.h[0]
589*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v16.8h,  v4.8h
590*c0909341SAndroid Build Coastguard Worker        sqadd           v17.8h,  v17.8h,  v5.8h
591*c0909341SAndroid Build Coastguard Worker        sqadd           v18.8h,  v18.8h,  v6.8h
592*c0909341SAndroid Build Coastguard Worker        sqadd           v19.8h,  v19.8h,  v7.8h
593*c0909341SAndroid Build Coastguard Worker        ret
594*c0909341SAndroid Build Coastguard Workerendfunc
595*c0909341SAndroid Build Coastguard Worker
596*c0909341SAndroid Build Coastguard Worker.macro identity_8x4_shift1 r0, r1, r2, r3, c
597*c0909341SAndroid Build Coastguard Worker.irp i, \r0\().8h, \r1\().8h, \r2\().8h, \r3\().8h
598*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,  \i,  \c
599*c0909341SAndroid Build Coastguard Worker        srhadd          \i,     \i,  v2.8h
600*c0909341SAndroid Build Coastguard Worker.endr
601*c0909341SAndroid Build Coastguard Worker.endm
602*c0909341SAndroid Build Coastguard Worker
603*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_wht_wht_4x4_8bpc_neon, export=1
604*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
605*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
606*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4h,v17.4h,v18.4h,v19.4h}, [x2]
607*c0909341SAndroid Build Coastguard Worker        st1             {v31.8h}, [x2], #16
608*c0909341SAndroid Build Coastguard Worker
609*c0909341SAndroid Build Coastguard Worker        sshr            v16.4h,  v16.4h,  #2
610*c0909341SAndroid Build Coastguard Worker        sshr            v17.4h,  v17.4h,  #2
611*c0909341SAndroid Build Coastguard Worker        sshr            v18.4h,  v18.4h,  #2
612*c0909341SAndroid Build Coastguard Worker        sshr            v19.4h,  v19.4h,  #2
613*c0909341SAndroid Build Coastguard Worker
614*c0909341SAndroid Build Coastguard Worker        iwht4
615*c0909341SAndroid Build Coastguard Worker
616*c0909341SAndroid Build Coastguard Worker        st1             {v31.8h}, [x2], #16
617*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v20, v21, v22, v23
618*c0909341SAndroid Build Coastguard Worker
619*c0909341SAndroid Build Coastguard Worker        iwht4
620*c0909341SAndroid Build Coastguard Worker
621*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[0], [x0], x1
622*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[1], [x0], x1
623*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v17.d[0]
624*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v19.d[0]
625*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[0], [x0], x1
626*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[1], [x0], x1
627*c0909341SAndroid Build Coastguard Worker
628*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
629*c0909341SAndroid Build Coastguard Workerendfunc
630*c0909341SAndroid Build Coastguard Worker
631*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x4_neon
632*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
633*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4h,v17.4h,v18.4h,v19.4h}, [x2]
634*c0909341SAndroid Build Coastguard Worker        st1             {v31.8h}, [x2], #16
635*c0909341SAndroid Build Coastguard Worker
636*c0909341SAndroid Build Coastguard Worker        blr             x4
637*c0909341SAndroid Build Coastguard Worker
638*c0909341SAndroid Build Coastguard Worker        st1             {v31.8h}, [x2], #16
639*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v20, v21, v22, v23
640*c0909341SAndroid Build Coastguard Worker
641*c0909341SAndroid Build Coastguard Worker        blr             x5
642*c0909341SAndroid Build Coastguard Worker
643*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[0], [x0], x1
644*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[1], [x0], x1
645*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v17.d[0]
646*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v19.d[0]
647*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[0], [x0], x1
648*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[1], [x0], x1
649*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #4
650*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v18.8h,  #4
651*c0909341SAndroid Build Coastguard Worker
652*c0909341SAndroid Build Coastguard WorkerL(itx_4x4_end):
653*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #2
654*c0909341SAndroid Build Coastguard Worker        uaddw           v16.8h,  v16.8h,  v0.8b
655*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v16.8h
656*c0909341SAndroid Build Coastguard Worker        uaddw           v18.8h,  v18.8h,  v1.8b
657*c0909341SAndroid Build Coastguard Worker        st1             {v0.s}[0], [x0], x1
658*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v18.8h
659*c0909341SAndroid Build Coastguard Worker        st1             {v0.s}[1], [x0], x1
660*c0909341SAndroid Build Coastguard Worker        st1             {v1.s}[0], [x0], x1
661*c0909341SAndroid Build Coastguard Worker        st1             {v1.s}[1], [x0], x1
662*c0909341SAndroid Build Coastguard Worker
663*c0909341SAndroid Build Coastguard Worker        ret             x15
664*c0909341SAndroid Build Coastguard Workerendfunc
665*c0909341SAndroid Build Coastguard Worker
666*c0909341SAndroid Build Coastguard Worker.macro def_fn_4x4 txfm1, txfm2
667*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_4x4_8bpc_neon, export=1
668*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
669*c0909341SAndroid Build Coastguard Worker
670*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
671*c0909341SAndroid Build Coastguard Worker        cbnz            w3,  1f
672*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
673*c0909341SAndroid Build Coastguard Worker        ld1r            {v16.8h}, [x2]
674*c0909341SAndroid Build Coastguard Worker        dup             v4.8h,   w16
675*c0909341SAndroid Build Coastguard Worker        strh            wzr, [x2]
676*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v4.h[0]
677*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[0], [x0], x1
678*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.8h,  v16.8h,  v4.h[0]
679*c0909341SAndroid Build Coastguard Worker        ld1             {v0.s}[1], [x0], x1
680*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v20.8h,  #4
681*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[0], [x0], x1
682*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v20.8h,  #4
683*c0909341SAndroid Build Coastguard Worker        ld1             {v1.s}[1], [x0], x1
684*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x4_end)
685*c0909341SAndroid Build Coastguard Worker1:
686*c0909341SAndroid Build Coastguard Worker.endif
687*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4h_x4_neon
688*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_4h_x4_neon
689*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_4x4_neon
690*c0909341SAndroid Build Coastguard Workerendfunc
691*c0909341SAndroid Build Coastguard Worker.endm
692*c0909341SAndroid Build Coastguard Worker
693*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, dct
694*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, identity
695*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, adst
696*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, flipadst
697*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 dct, identity
698*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, dct
699*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, adst
700*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, flipadst
701*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, dct
702*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, adst
703*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, flipadst
704*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, dct
705*c0909341SAndroid Build Coastguard Worker
706*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 adst, identity
707*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 flipadst, identity
708*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, adst
709*c0909341SAndroid Build Coastguard Workerdef_fn_4x4 identity, flipadst
710*c0909341SAndroid Build Coastguard Worker
711*c0909341SAndroid Build Coastguard Worker.macro idct_8 r0, r1, r2, r3, r4, r5, r6, r7, sz, szb
712*c0909341SAndroid Build Coastguard Worker        idct_4          \r0, \r2, \r4, \r6, \sz
713*c0909341SAndroid Build Coastguard Worker
714*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  \r1, \r7, v0.h[4], v0.h[5], \sz // -> t4a
715*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  \r1, \r7, v0.h[5], v0.h[4], \sz // -> t7a
716*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  \r5, \r3, v0.h[6], v0.h[7], \sz // -> t5a
717*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \r1, v2,  v3,  #12, \sz                   // t4a
718*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \r7, v4,  v5,  #12, \sz                   // t7a
719*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  \r5, \r3, v0.h[7], v0.h[6], \sz // -> t6a
720*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \r3, v6,  v7,  #12, \sz                   // t5a
721*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \r5, v2,  v3,  #12, \sz                   // t6a
722*c0909341SAndroid Build Coastguard Worker
723*c0909341SAndroid Build Coastguard Worker        sqadd           v2\sz,   \r1\sz,  \r3\sz // t4
724*c0909341SAndroid Build Coastguard Worker        sqsub           \r1\sz,  \r1\sz,  \r3\sz // t5a
725*c0909341SAndroid Build Coastguard Worker        sqadd           v3\sz,   \r7\sz,  \r5\sz // t7
726*c0909341SAndroid Build Coastguard Worker        sqsub           \r3\sz,  \r7\sz,  \r5\sz // t6a
727*c0909341SAndroid Build Coastguard Worker
728*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  \r3, \r1, v0.h[0], v0.h[0], \sz // -> t5
729*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  \r3, \r1, v0.h[0], v0.h[0], \sz // -> t6
730*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v4,  v4,  v5,  #12, \sz // t5
731*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v5,  v6,  v7,  #12, \sz // t6
732*c0909341SAndroid Build Coastguard Worker
733*c0909341SAndroid Build Coastguard Worker        sqsub           \r7\sz,  \r0\sz,  v3\sz // out7
734*c0909341SAndroid Build Coastguard Worker        sqadd           \r0\sz,  \r0\sz,  v3\sz // out0
735*c0909341SAndroid Build Coastguard Worker        sqadd           \r1\sz,  \r2\sz,  v5\sz // out1
736*c0909341SAndroid Build Coastguard Worker        sqsub           v6\sz,   \r2\sz,  v5\sz // out6
737*c0909341SAndroid Build Coastguard Worker        sqadd           \r2\sz,  \r4\sz,  v4\sz // out2
738*c0909341SAndroid Build Coastguard Worker        sqsub           \r5\sz,  \r4\sz,  v4\sz // out5
739*c0909341SAndroid Build Coastguard Worker        sqadd           \r3\sz,  \r6\sz,  v2\sz // out3
740*c0909341SAndroid Build Coastguard Worker        sqsub           \r4\sz,  \r6\sz,  v2\sz // out4
741*c0909341SAndroid Build Coastguard Worker        mov             \r6\szb, v6\szb         // out6
742*c0909341SAndroid Build Coastguard Worker.endm
743*c0909341SAndroid Build Coastguard Worker
744*c0909341SAndroid Build Coastguard Workerfunction inv_dct_8h_x8_neon, export=1
745*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
746*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
747*c0909341SAndroid Build Coastguard Worker        idct_8          v16, v17, v18, v19, v20, v21, v22, v23, .8h, .16b
748*c0909341SAndroid Build Coastguard Worker        ret
749*c0909341SAndroid Build Coastguard Workerendfunc
750*c0909341SAndroid Build Coastguard Worker
751*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x8_neon, export=1
752*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
753*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
754*c0909341SAndroid Build Coastguard Worker        idct_8          v16, v17, v18, v19, v20, v21, v22, v23, .4h, .8b
755*c0909341SAndroid Build Coastguard Worker        ret
756*c0909341SAndroid Build Coastguard Workerendfunc
757*c0909341SAndroid Build Coastguard Worker
758*c0909341SAndroid Build Coastguard Worker.macro iadst_8 o0, o1, o2, o3, o4, o5, o6, o7, sz
759*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst8_coeffs
760*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x16]
761*c0909341SAndroid Build Coastguard Worker
762*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v23, v16, v0.h[0], v0.h[1], \sz
763*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v23, v16, v0.h[1], v0.h[0], \sz
764*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v21, v18, v0.h[2], v0.h[3], \sz
765*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v16, v2,  v3,  #12, \sz  // t0a
766*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v4,  v5,  #12, \sz  // t1a
767*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v21, v18, v0.h[3], v0.h[2], \sz
768*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v19, v20, v0.h[4], v0.h[5], \sz
769*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v6,  v7,  #12, \sz  // t2a
770*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v2,  v3,  #12, \sz  // t3a
771*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v19, v20, v0.h[5], v0.h[4], \sz
772*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v17, v22, v0.h[6], v0.h[7], \sz
773*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v4,  v5,  #12, \sz  // t4a
774*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v6,  v7,  #12, \sz  // t5a
775*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v17, v22, v0.h[7], v0.h[6], \sz
776*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v2,  v3,  #12, \sz  // t6a
777*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v4,  v5,  #12, \sz  // t7a
778*c0909341SAndroid Build Coastguard Worker
779*c0909341SAndroid Build Coastguard Worker        sqadd           v2\sz,   v16\sz,  v20\sz // t0
780*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v16\sz,  v20\sz // t4
781*c0909341SAndroid Build Coastguard Worker        sqadd           v4\sz,   v23\sz,  v19\sz // t1
782*c0909341SAndroid Build Coastguard Worker        sqsub           v5\sz,   v23\sz,  v19\sz // t5
783*c0909341SAndroid Build Coastguard Worker        sqadd           v6\sz,   v18\sz,  v22\sz // t2
784*c0909341SAndroid Build Coastguard Worker        sqsub           v7\sz,   v18\sz,  v22\sz // t6
785*c0909341SAndroid Build Coastguard Worker        sqadd           v18\sz,  v21\sz,  v17\sz // t3
786*c0909341SAndroid Build Coastguard Worker        sqsub           v19\sz,  v21\sz,  v17\sz // t7
787*c0909341SAndroid Build Coastguard Worker
788*c0909341SAndroid Build Coastguard Worker        smull_smlal     v16, v17, v3,  v5,  v1.h[3], v1.h[2], \sz
789*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v20, v21, v3,  v5,  v1.h[2], v1.h[3], \sz
790*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v22, v23, v19, v7,  v1.h[3], v1.h[2], \sz
791*c0909341SAndroid Build Coastguard Worker
792*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v3,  v16, v17, #12, \sz  // t4a
793*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v5,  v20, v21, #12, \sz  // t5a
794*c0909341SAndroid Build Coastguard Worker
795*c0909341SAndroid Build Coastguard Worker        smull_smlal     v16, v17, v19, v7,  v1.h[2], v1.h[3], \sz
796*c0909341SAndroid Build Coastguard Worker
797*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v7,  v22, v23, #12, \sz  // t6a
798*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v16, v17, #12, \sz  // t7a
799*c0909341SAndroid Build Coastguard Worker
800*c0909341SAndroid Build Coastguard Worker        sqadd           \o0\()\sz, v2\sz, v6\sz  // out0
801*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,     v2\sz, v6\sz  // t2
802*c0909341SAndroid Build Coastguard Worker        sqadd           \o7\()\sz, v4\sz, v18\sz // out7
803*c0909341SAndroid Build Coastguard Worker        sqsub           v4\sz,     v4\sz, v18\sz // t3
804*c0909341SAndroid Build Coastguard Worker        sqneg           \o7\()\sz, \o7\()\sz     // out7
805*c0909341SAndroid Build Coastguard Worker
806*c0909341SAndroid Build Coastguard Worker        sqadd           \o1\()\sz, v3\sz, v7\sz  // out1
807*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,     v3\sz, v7\sz  // t6
808*c0909341SAndroid Build Coastguard Worker        sqadd           \o6\()\sz, v5\sz, v19\sz // out6
809*c0909341SAndroid Build Coastguard Worker        sqsub           v5\sz,     v5\sz, v19\sz // t7
810*c0909341SAndroid Build Coastguard Worker        sqneg           \o1\()\sz, \o1\()\sz     // out1
811*c0909341SAndroid Build Coastguard Worker
812*c0909341SAndroid Build Coastguard Worker        smull_smlal     v18, v19, v2,  v4,  v1.h[0], v1.h[0], \sz // -> out3 (v19 or v20)
813*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v2,  v4,  v1.h[0], v1.h[0], \sz // -> out4 (v20 or v19)
814*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v20, v21, v3,  v5,  v1.h[0], v1.h[0], \sz // -> out5 (v21 or v18)
815*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v2,  v18, v19, #12, \sz // out3
816*c0909341SAndroid Build Coastguard Worker        smull_smlal     v18, v19, v3,  v5,  v1.h[0], v1.h[0], \sz // -> out2 (v18 or v21)
817*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v3,  v20, v21, #12, \sz // out5
818*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \o2, v18, v19, #12, \sz // out2 (v18 or v21)
819*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \o4, v6,  v7,  #12, \sz // out4 (v20 or v19)
820*c0909341SAndroid Build Coastguard Worker
821*c0909341SAndroid Build Coastguard Worker        sqneg           \o3\()\sz, v2\sz     // out3
822*c0909341SAndroid Build Coastguard Worker        sqneg           \o5\()\sz, v3\sz     // out5
823*c0909341SAndroid Build Coastguard Worker.endm
824*c0909341SAndroid Build Coastguard Worker
825*c0909341SAndroid Build Coastguard Workerfunction inv_adst_8h_x8_neon, export=1
826*c0909341SAndroid Build Coastguard Worker        iadst_8         v16, v17, v18, v19, v20, v21, v22, v23, .8h
827*c0909341SAndroid Build Coastguard Worker        ret
828*c0909341SAndroid Build Coastguard Workerendfunc
829*c0909341SAndroid Build Coastguard Worker
830*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_8h_x8_neon, export=1
831*c0909341SAndroid Build Coastguard Worker        iadst_8         v23, v22, v21, v20, v19, v18, v17, v16, .8h
832*c0909341SAndroid Build Coastguard Worker        ret
833*c0909341SAndroid Build Coastguard Workerendfunc
834*c0909341SAndroid Build Coastguard Worker
835*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x8_neon, export=1
836*c0909341SAndroid Build Coastguard Worker        iadst_8         v16, v17, v18, v19, v20, v21, v22, v23, .4h
837*c0909341SAndroid Build Coastguard Worker        ret
838*c0909341SAndroid Build Coastguard Workerendfunc
839*c0909341SAndroid Build Coastguard Worker
840*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x8_neon, export=1
841*c0909341SAndroid Build Coastguard Worker        iadst_8         v23, v22, v21, v20, v19, v18, v17, v16, .4h
842*c0909341SAndroid Build Coastguard Worker        ret
843*c0909341SAndroid Build Coastguard Workerendfunc
844*c0909341SAndroid Build Coastguard Worker
845*c0909341SAndroid Build Coastguard Workerfunction inv_identity_8h_x8_neon, export=1
846*c0909341SAndroid Build Coastguard Worker        sqshl           v16.8h,  v16.8h,  #1
847*c0909341SAndroid Build Coastguard Worker        sqshl           v17.8h,  v17.8h,  #1
848*c0909341SAndroid Build Coastguard Worker        sqshl           v18.8h,  v18.8h,  #1
849*c0909341SAndroid Build Coastguard Worker        sqshl           v19.8h,  v19.8h,  #1
850*c0909341SAndroid Build Coastguard Worker        sqshl           v20.8h,  v20.8h,  #1
851*c0909341SAndroid Build Coastguard Worker        sqshl           v21.8h,  v21.8h,  #1
852*c0909341SAndroid Build Coastguard Worker        sqshl           v22.8h,  v22.8h,  #1
853*c0909341SAndroid Build Coastguard Worker        sqshl           v23.8h,  v23.8h,  #1
854*c0909341SAndroid Build Coastguard Worker        ret
855*c0909341SAndroid Build Coastguard Workerendfunc
856*c0909341SAndroid Build Coastguard Worker
857*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x8_neon, export=1
858*c0909341SAndroid Build Coastguard Worker        sqshl           v16.4h,  v16.4h,  #1
859*c0909341SAndroid Build Coastguard Worker        sqshl           v17.4h,  v17.4h,  #1
860*c0909341SAndroid Build Coastguard Worker        sqshl           v18.4h,  v18.4h,  #1
861*c0909341SAndroid Build Coastguard Worker        sqshl           v19.4h,  v19.4h,  #1
862*c0909341SAndroid Build Coastguard Worker        sqshl           v20.4h,  v20.4h,  #1
863*c0909341SAndroid Build Coastguard Worker        sqshl           v21.4h,  v21.4h,  #1
864*c0909341SAndroid Build Coastguard Worker        sqshl           v22.4h,  v22.4h,  #1
865*c0909341SAndroid Build Coastguard Worker        sqshl           v23.4h,  v23.4h,  #1
866*c0909341SAndroid Build Coastguard Worker        ret
867*c0909341SAndroid Build Coastguard Workerendfunc
868*c0909341SAndroid Build Coastguard Worker
869*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8_base variant
870*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_8x8_neon
871*c0909341SAndroid Build Coastguard Worker        movi            v28.8h,  #0
872*c0909341SAndroid Build Coastguard Worker        movi            v29.8h,  #0
873*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
874*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
875*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h,v17.8h,v18.8h,v19.8h}, [x2]
876*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h,v29.8h,v30.8h,v31.8h}, [x2], #64
877*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8h,v21.8h,v22.8h,v23.8h}, [x2]
878*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h,v29.8h,v30.8h,v31.8h}, [x2]
879*c0909341SAndroid Build Coastguard Worker
880*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
881*c0909341SAndroid Build Coastguard Worker        // The identity shl #1 and downshift srshr #1 cancel out
882*c0909341SAndroid Build Coastguard Worker
883*c0909341SAndroid Build Coastguard Worker        b               L(itx_8x8_epilog)
884*c0909341SAndroid Build Coastguard Worker.else
885*c0909341SAndroid Build Coastguard Worker        blr             x4
886*c0909341SAndroid Build Coastguard Worker
887*c0909341SAndroid Build Coastguard Worker        srshr           v16.8h,  v16.8h,  #1
888*c0909341SAndroid Build Coastguard Worker        srshr           v17.8h,  v17.8h,  #1
889*c0909341SAndroid Build Coastguard Worker        srshr           v18.8h,  v18.8h,  #1
890*c0909341SAndroid Build Coastguard Worker        srshr           v19.8h,  v19.8h,  #1
891*c0909341SAndroid Build Coastguard Worker        srshr           v20.8h,  v20.8h,  #1
892*c0909341SAndroid Build Coastguard Worker        srshr           v21.8h,  v21.8h,  #1
893*c0909341SAndroid Build Coastguard Worker        srshr           v22.8h,  v22.8h,  #1
894*c0909341SAndroid Build Coastguard Worker        srshr           v23.8h,  v23.8h,  #1
895*c0909341SAndroid Build Coastguard Worker
896*c0909341SAndroid Build Coastguard WorkerL(itx_8x8_epilog):
897*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
898*c0909341SAndroid Build Coastguard Worker
899*c0909341SAndroid Build Coastguard Worker        blr             x5
900*c0909341SAndroid Build Coastguard Worker
901*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7
902*c0909341SAndroid Build Coastguard Worker        ret             x15
903*c0909341SAndroid Build Coastguard Worker.endif
904*c0909341SAndroid Build Coastguard Workerendfunc
905*c0909341SAndroid Build Coastguard Worker.endm
906*c0909341SAndroid Build Coastguard Worker
907*c0909341SAndroid Build Coastguard Workerdef_fn_8x8_base identity_
908*c0909341SAndroid Build Coastguard Workerdef_fn_8x8_base
909*c0909341SAndroid Build Coastguard Worker
910*c0909341SAndroid Build Coastguard Worker.macro def_fn_8x8 txfm1, txfm2
911*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_8x8_8bpc_neon, export=1
912*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
913*c0909341SAndroid Build Coastguard Worker
914*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
915*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   8,   1
916*c0909341SAndroid Build Coastguard Worker.endif
917*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_8h_x8_neon
918*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
919*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_8x8_neon
920*c0909341SAndroid Build Coastguard Worker.else
921*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_8h_x8_neon
922*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_8x8_neon
923*c0909341SAndroid Build Coastguard Worker.endif
924*c0909341SAndroid Build Coastguard Workerendfunc
925*c0909341SAndroid Build Coastguard Worker.endm
926*c0909341SAndroid Build Coastguard Worker
927*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, dct
928*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, identity
929*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, adst
930*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, flipadst
931*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 dct, identity
932*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, dct
933*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, adst
934*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, flipadst
935*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, dct
936*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, adst
937*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, flipadst
938*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, dct
939*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 adst, identity
940*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 flipadst, identity
941*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, adst
942*c0909341SAndroid Build Coastguard Workerdef_fn_8x8 identity, flipadst
943*c0909341SAndroid Build Coastguard Worker
944*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_8x4_neon
945*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
946*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
947*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
948*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
949*c0909341SAndroid Build Coastguard Worker        ld1             {v16.4h,v17.4h,v18.4h,v19.4h}, [x2]
950*c0909341SAndroid Build Coastguard Worker        st1             {v30.8h,v31.8h}, [x2], #32
951*c0909341SAndroid Build Coastguard Worker        ld1             {v20.4h,v21.4h,v22.4h,v23.4h}, [x2]
952*c0909341SAndroid Build Coastguard Worker        st1             {v30.8h,v31.8h}, [x2]
953*c0909341SAndroid Build Coastguard Worker
954*c0909341SAndroid Build Coastguard Worker        scale_input     .4h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
955*c0909341SAndroid Build Coastguard Worker
956*c0909341SAndroid Build Coastguard Worker        blr             x4
957*c0909341SAndroid Build Coastguard Worker
958*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v16, v17, v18, v19, v4,  v5,  v6,  v7
959*c0909341SAndroid Build Coastguard Worker        transpose_4x4h  v20, v21, v22, v23, v4,  v5,  v6,  v7
960*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v20.d[0]
961*c0909341SAndroid Build Coastguard Worker        ins             v17.d[1], v21.d[0]
962*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v22.d[0]
963*c0909341SAndroid Build Coastguard Worker        ins             v19.d[1], v23.d[0]
964*c0909341SAndroid Build Coastguard Worker
965*c0909341SAndroid Build Coastguard Worker        blr             x5
966*c0909341SAndroid Build Coastguard Worker
967*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x0, x7
968*c0909341SAndroid Build Coastguard Worker        ret             x15
969*c0909341SAndroid Build Coastguard Workerendfunc
970*c0909341SAndroid Build Coastguard Worker
971*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_4x8_neon
972*c0909341SAndroid Build Coastguard Worker        movi            v28.8h,  #0
973*c0909341SAndroid Build Coastguard Worker        movi            v29.8h,  #0
974*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
975*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
976*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
977*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
978*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h,v17.8h,v18.8h,v19.8h}, [x2]
979*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h,v29.8h,v30.8h,v31.8h}, [x2]
980*c0909341SAndroid Build Coastguard Worker
981*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19
982*c0909341SAndroid Build Coastguard Worker
983*c0909341SAndroid Build Coastguard Worker        blr             x4
984*c0909341SAndroid Build Coastguard Worker
985*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v4,  v5,  v6,  v7
986*c0909341SAndroid Build Coastguard Worker        ins             v20.d[0], v16.d[1]
987*c0909341SAndroid Build Coastguard Worker        ins             v21.d[0], v17.d[1]
988*c0909341SAndroid Build Coastguard Worker        ins             v22.d[0], v18.d[1]
989*c0909341SAndroid Build Coastguard Worker        ins             v23.d[0], v19.d[1]
990*c0909341SAndroid Build Coastguard Worker
991*c0909341SAndroid Build Coastguard Worker        blr             x5
992*c0909341SAndroid Build Coastguard Worker
993*c0909341SAndroid Build Coastguard Worker        load_add_store_4x8 x0, x7
994*c0909341SAndroid Build Coastguard Worker        ret             x15
995*c0909341SAndroid Build Coastguard Workerendfunc
996*c0909341SAndroid Build Coastguard Worker
997*c0909341SAndroid Build Coastguard Worker.macro def_fn_48 w, h, txfm1, txfm2
998*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
999*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1000*c0909341SAndroid Build Coastguard Worker
1001*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1002*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  0
1003*c0909341SAndroid Build Coastguard Worker.endif
1004*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_\h\()h_x\w\()_neon
1005*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_\w\()h_x\h\()_neon
1006*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1007*c0909341SAndroid Build Coastguard Workerendfunc
1008*c0909341SAndroid Build Coastguard Worker.endm
1009*c0909341SAndroid Build Coastguard Worker
1010*c0909341SAndroid Build Coastguard Worker.macro def_fns_48 w, h
1011*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, dct
1012*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, identity
1013*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, adst
1014*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, flipadst
1015*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, dct, identity
1016*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, dct
1017*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, adst
1018*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, flipadst
1019*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, dct
1020*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, adst
1021*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, flipadst
1022*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, dct
1023*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, adst, identity
1024*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, flipadst, identity
1025*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, adst
1026*c0909341SAndroid Build Coastguard Workerdef_fn_48 \w, \h, identity, flipadst
1027*c0909341SAndroid Build Coastguard Worker.endm
1028*c0909341SAndroid Build Coastguard Worker
1029*c0909341SAndroid Build Coastguard Workerdef_fns_48 4, 8
1030*c0909341SAndroid Build Coastguard Workerdef_fns_48 8, 4
1031*c0909341SAndroid Build Coastguard Worker
1032*c0909341SAndroid Build Coastguard Worker
1033*c0909341SAndroid Build Coastguard Worker.macro idct_16 sz, szb
1034*c0909341SAndroid Build Coastguard Worker        idct_8          v16, v18, v20, v22, v24, v26, v28, v30, \sz, \szb
1035*c0909341SAndroid Build Coastguard Worker
1036*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v17, v31, v1.h[0], v1.h[1], \sz // -> t8a
1037*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v17, v31, v1.h[1], v1.h[0], \sz // -> t15a
1038*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v25, v23, v1.h[2], v1.h[3], \sz // -> t9a
1039*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v2,  v3,  #12, \sz                   // t8a
1040*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v31, v4,  v5,  #12, \sz                   // t15a
1041*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v25, v23, v1.h[3], v1.h[2], \sz // -> t14a
1042*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v21, v27, v1.h[4], v1.h[5], \sz // -> t10a
1043*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v6,  v7,  #12, \sz                   // t9a
1044*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v2,  v3,  #12, \sz                   // t14a
1045*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v21, v27, v1.h[5], v1.h[4], \sz // -> t13a
1046*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v29, v19, v1.h[6], v1.h[7], \sz // -> t11a
1047*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v4,  v5,  #12, \sz                   // t10a
1048*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, \sz                   // t13a
1049*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v29, v19, v1.h[7], v1.h[6], \sz // -> t12a
1050*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v2,  v3,  #12, \sz                   // t11a
1051*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v4,  v5,  #12, \sz                   // t12a
1052*c0909341SAndroid Build Coastguard Worker
1053*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,   v17\sz,  v23\sz  // t9
1054*c0909341SAndroid Build Coastguard Worker        sqadd           v17\sz,  v17\sz,  v23\sz  // t8
1055*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v31\sz,  v25\sz  // t14
1056*c0909341SAndroid Build Coastguard Worker        sqadd           v31\sz,  v31\sz,  v25\sz  // t15
1057*c0909341SAndroid Build Coastguard Worker        sqsub           v23\sz,  v19\sz,  v21\sz  // t10
1058*c0909341SAndroid Build Coastguard Worker        sqadd           v19\sz,  v19\sz,  v21\sz  // t11
1059*c0909341SAndroid Build Coastguard Worker        sqadd           v25\sz,  v29\sz,  v27\sz  // t12
1060*c0909341SAndroid Build Coastguard Worker        sqsub           v29\sz,  v29\sz,  v27\sz  // t13
1061*c0909341SAndroid Build Coastguard Worker
1062*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v3,  v2,  v0.h[2], v0.h[3], \sz // -> t9a
1063*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v3,  v2,  v0.h[3], v0.h[2], \sz // -> t14a
1064*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v4,  v5,  #12, \sz                   // t9a
1065*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, \sz                   // t14a
1066*c0909341SAndroid Build Coastguard Worker
1067*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v29, v23, v0.h[2], v0.h[3], \sz // -> t13a
1068*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v29, v23, v0.h[3], v0.h[2], \sz // -> t10a
1069*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v4,  v5,  #12, \sz                   // t13a
1070*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s
1071*c0909341SAndroid Build Coastguard Worker.ifc \sz, .8h
1072*c0909341SAndroid Build Coastguard Worker        neg             v7.4s,   v7.4s
1073*c0909341SAndroid Build Coastguard Worker.endif
1074*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v6,  v7,  #12, \sz                   // t10a
1075*c0909341SAndroid Build Coastguard Worker
1076*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,   v17\sz,  v19\sz  // t11a
1077*c0909341SAndroid Build Coastguard Worker        sqadd           v17\sz,  v17\sz,  v19\sz  // t8a
1078*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v31\sz,  v25\sz  // t12a
1079*c0909341SAndroid Build Coastguard Worker        sqadd           v31\sz,  v31\sz,  v25\sz  // t15a
1080*c0909341SAndroid Build Coastguard Worker        sqadd           v19\sz,  v21\sz,  v23\sz  // t9
1081*c0909341SAndroid Build Coastguard Worker        sqsub           v21\sz,  v21\sz,  v23\sz  // t10
1082*c0909341SAndroid Build Coastguard Worker        sqsub           v25\sz,  v27\sz,  v29\sz  // t13
1083*c0909341SAndroid Build Coastguard Worker        sqadd           v27\sz,  v27\sz,  v29\sz  // t14
1084*c0909341SAndroid Build Coastguard Worker
1085*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v3,  v2,  v0.h[0], v0.h[0], \sz // -> t11
1086*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v3,  v2,  v0.h[0], v0.h[0], \sz // -> t12
1087*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v25, v21, v0.h[0], v0.h[0], \sz // -> t10a
1088*c0909341SAndroid Build Coastguard Worker
1089*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v4,  v4,  v5,  #12, \sz   // t11
1090*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v5,  v6,  v7,  #12, \sz   // t12
1091*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v25, v21, v0.h[0], v0.h[0], \sz // -> t13a
1092*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v2,  v2,  v3,  #12, \sz   // t10a
1093*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v3,  v6,  v7,  #12, \sz   // t13a
1094*c0909341SAndroid Build Coastguard Worker
1095*c0909341SAndroid Build Coastguard Worker        sqadd           v6\sz,   v16\sz,  v31\sz  // out0
1096*c0909341SAndroid Build Coastguard Worker        sqsub           v31\sz,  v16\sz,  v31\sz  // out15
1097*c0909341SAndroid Build Coastguard Worker        mov             v16\szb, v6\szb
1098*c0909341SAndroid Build Coastguard Worker        sqadd           v23\sz,  v30\sz,  v17\sz  // out7
1099*c0909341SAndroid Build Coastguard Worker        sqsub           v7\sz,   v30\sz,  v17\sz  // out8
1100*c0909341SAndroid Build Coastguard Worker        sqadd           v17\sz,  v18\sz,  v27\sz  // out1
1101*c0909341SAndroid Build Coastguard Worker        sqsub           v30\sz,  v18\sz,  v27\sz  // out14
1102*c0909341SAndroid Build Coastguard Worker        sqadd           v18\sz,  v20\sz,  v3\sz   // out2
1103*c0909341SAndroid Build Coastguard Worker        sqsub           v29\sz,  v20\sz,  v3\sz   // out13
1104*c0909341SAndroid Build Coastguard Worker        sqadd           v3\sz,   v28\sz,  v19\sz  // out6
1105*c0909341SAndroid Build Coastguard Worker        sqsub           v25\sz,  v28\sz,  v19\sz  // out9
1106*c0909341SAndroid Build Coastguard Worker        sqadd           v19\sz,  v22\sz,  v5\sz   // out3
1107*c0909341SAndroid Build Coastguard Worker        sqsub           v28\sz,  v22\sz,  v5\sz   // out12
1108*c0909341SAndroid Build Coastguard Worker        sqadd           v20\sz,  v24\sz,  v4\sz   // out4
1109*c0909341SAndroid Build Coastguard Worker        sqsub           v27\sz,  v24\sz,  v4\sz   // out11
1110*c0909341SAndroid Build Coastguard Worker        sqadd           v21\sz,  v26\sz,  v2\sz   // out5
1111*c0909341SAndroid Build Coastguard Worker        sqsub           v26\sz,  v26\sz,  v2\sz   // out10
1112*c0909341SAndroid Build Coastguard Worker        mov             v24\szb, v7\szb
1113*c0909341SAndroid Build Coastguard Worker        mov             v22\szb, v3\szb
1114*c0909341SAndroid Build Coastguard Worker.endm
1115*c0909341SAndroid Build Coastguard Worker
1116*c0909341SAndroid Build Coastguard Workerfunction inv_dct_8h_x16_neon, export=1
1117*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
1118*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x16]
1119*c0909341SAndroid Build Coastguard Worker        idct_16         .8h, .16b
1120*c0909341SAndroid Build Coastguard Worker        ret
1121*c0909341SAndroid Build Coastguard Workerendfunc
1122*c0909341SAndroid Build Coastguard Worker
1123*c0909341SAndroid Build Coastguard Workerfunction inv_dct_4h_x16_neon, export=1
1124*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
1125*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x16]
1126*c0909341SAndroid Build Coastguard Worker        idct_16         .4h, .8b
1127*c0909341SAndroid Build Coastguard Worker        ret
1128*c0909341SAndroid Build Coastguard Workerendfunc
1129*c0909341SAndroid Build Coastguard Worker
1130*c0909341SAndroid Build Coastguard Worker.macro iadst_16 o0, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, o11, o12, o13, o14, o15, sz, szb
1131*c0909341SAndroid Build Coastguard Worker        movrel          x16, iadst16_coeffs
1132*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x16]
1133*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
1134*c0909341SAndroid Build Coastguard Worker
1135*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v31, v16, v0.h[0], v0.h[1], \sz // -> t0
1136*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v31, v16, v0.h[1], v0.h[0], \sz // -> t1
1137*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v29, v18, v0.h[2], v0.h[3], \sz // -> t2
1138*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v16, v2,  v3,  #12, \sz   // t0
1139*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v31, v4,  v5,  #12, \sz   // t1
1140*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v29, v18, v0.h[3], v0.h[2], \sz // -> t3
1141*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v27, v20, v0.h[4], v0.h[5], \sz // -> t4
1142*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v6,  v7,  #12, \sz   // t2
1143*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v2,  v3,  #12, \sz   // t3
1144*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v27, v20, v0.h[5], v0.h[4], \sz // -> t5
1145*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v25, v22, v0.h[6], v0.h[7], \sz // -> t6
1146*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v4,  v5,  #12, \sz   // t4
1147*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, \sz   // t5
1148*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v25, v22, v0.h[7], v0.h[6], \sz // -> t7
1149*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v23, v24, v1.h[0], v1.h[1], \sz // -> t8
1150*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v2,  v3,  #12, \sz   // t6
1151*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v4,  v5,  #12, \sz   // t7
1152*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v23, v24, v1.h[1], v1.h[0], \sz // -> t9
1153*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v21, v26, v1.h[2], v1.h[3], \sz // -> t10
1154*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v6,  v7,  #12, \sz   // t8
1155*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v2,  v3,  #12, \sz   // t9
1156*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v21, v26, v1.h[3], v1.h[2], \sz // -> t11
1157*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v19, v28, v1.h[4], v1.h[5], \sz // -> t12
1158*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v4,  v5,  #12, \sz   // t10
1159*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v6,  v7,  #12, \sz   // t11
1160*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v19, v28, v1.h[5], v1.h[4], \sz // -> t13
1161*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v17, v30, v1.h[6], v1.h[7], \sz // -> t14
1162*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v2,  v3,  #12, \sz   // t12
1163*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v28, v4,  v5,  #12, \sz   // t13
1164*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v17, v30, v1.h[7], v1.h[6], \sz // -> t15
1165*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v6,  v7,  #12, \sz   // t14
1166*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v30, v2,  v3,  #12, \sz   // t15
1167*c0909341SAndroid Build Coastguard Worker
1168*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
1169*c0909341SAndroid Build Coastguard Worker
1170*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,   v16\sz,  v23\sz // t8a
1171*c0909341SAndroid Build Coastguard Worker        sqadd           v16\sz,  v16\sz,  v23\sz // t0a
1172*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v31\sz,  v24\sz // t9a
1173*c0909341SAndroid Build Coastguard Worker        sqadd           v31\sz,  v31\sz,  v24\sz // t1a
1174*c0909341SAndroid Build Coastguard Worker        sqadd           v23\sz,  v18\sz,  v21\sz // t2a
1175*c0909341SAndroid Build Coastguard Worker        sqsub           v18\sz,  v18\sz,  v21\sz // t10a
1176*c0909341SAndroid Build Coastguard Worker        sqadd           v24\sz,  v29\sz,  v26\sz // t3a
1177*c0909341SAndroid Build Coastguard Worker        sqsub           v29\sz,  v29\sz,  v26\sz // t11a
1178*c0909341SAndroid Build Coastguard Worker        sqadd           v21\sz,  v20\sz,  v19\sz // t4a
1179*c0909341SAndroid Build Coastguard Worker        sqsub           v20\sz,  v20\sz,  v19\sz // t12a
1180*c0909341SAndroid Build Coastguard Worker        sqadd           v26\sz,  v27\sz,  v28\sz // t5a
1181*c0909341SAndroid Build Coastguard Worker        sqsub           v27\sz,  v27\sz,  v28\sz // t13a
1182*c0909341SAndroid Build Coastguard Worker        sqadd           v19\sz,  v22\sz,  v17\sz // t6a
1183*c0909341SAndroid Build Coastguard Worker        sqsub           v22\sz,  v22\sz,  v17\sz // t14a
1184*c0909341SAndroid Build Coastguard Worker        sqadd           v28\sz,  v25\sz,  v30\sz // t7a
1185*c0909341SAndroid Build Coastguard Worker        sqsub           v25\sz,  v25\sz,  v30\sz // t15a
1186*c0909341SAndroid Build Coastguard Worker
1187*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v2,  v3,  v0.h[5], v0.h[4], \sz // -> t8
1188*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v2,  v3,  v0.h[4], v0.h[5], \sz // -> t9
1189*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v18, v29, v0.h[7], v0.h[6], \sz // -> t10
1190*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v4,  v5,  #12, \sz  // t8
1191*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v30, v6,  v7,  #12, \sz  // t9
1192*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v18, v29, v0.h[6], v0.h[7], \sz // -> t11
1193*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v27, v20, v0.h[5], v0.h[4], \sz // -> t12
1194*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v2,  v3,  #12, \sz  // t10
1195*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v4,  v5,  #12, \sz  // t11
1196*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v27, v20, v0.h[4], v0.h[5], \sz // -> t13
1197*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v25, v22, v0.h[7], v0.h[6], \sz // -> t14
1198*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, \sz  // t12
1199*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v2,  v3,  #12, \sz  // t13
1200*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v25, v22, v0.h[6], v0.h[7], \sz // -> t15
1201*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v4,  v5,  #12, \sz  // t14
1202*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v6,  v7,  #12, \sz  // t15
1203*c0909341SAndroid Build Coastguard Worker
1204*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,   v16\sz,  v21\sz // t4
1205*c0909341SAndroid Build Coastguard Worker        sqadd           v16\sz,  v16\sz,  v21\sz // t0
1206*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v31\sz,  v26\sz // t5
1207*c0909341SAndroid Build Coastguard Worker        sqadd           v31\sz,  v31\sz,  v26\sz // t1
1208*c0909341SAndroid Build Coastguard Worker        sqadd           v21\sz,  v23\sz,  v19\sz // t2
1209*c0909341SAndroid Build Coastguard Worker        sqsub           v23\sz,  v23\sz,  v19\sz // t6
1210*c0909341SAndroid Build Coastguard Worker        sqadd           v26\sz,  v24\sz,  v28\sz // t3
1211*c0909341SAndroid Build Coastguard Worker        sqsub           v24\sz,  v24\sz,  v28\sz // t7
1212*c0909341SAndroid Build Coastguard Worker        sqadd           v19\sz,  v17\sz,  v27\sz // t8a
1213*c0909341SAndroid Build Coastguard Worker        sqsub           v17\sz,  v17\sz,  v27\sz // t12a
1214*c0909341SAndroid Build Coastguard Worker        sqadd           v28\sz,  v30\sz,  v20\sz // t9a
1215*c0909341SAndroid Build Coastguard Worker        sqsub           v30\sz,  v30\sz,  v20\sz // t13a
1216*c0909341SAndroid Build Coastguard Worker        sqadd           v27\sz,  v18\sz,  v25\sz // t10a
1217*c0909341SAndroid Build Coastguard Worker        sqsub           v18\sz,  v18\sz,  v25\sz // t14a
1218*c0909341SAndroid Build Coastguard Worker        sqadd           v20\sz,  v29\sz,  v22\sz // t11a
1219*c0909341SAndroid Build Coastguard Worker        sqsub           v29\sz,  v29\sz,  v22\sz // t15a
1220*c0909341SAndroid Build Coastguard Worker
1221*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v2,  v3,  v0.h[3], v0.h[2], \sz // -> t4a
1222*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v2,  v3,  v0.h[2], v0.h[3], \sz // -> t5a
1223*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v24, v23, v0.h[3], v0.h[2], \sz // -> t6a
1224*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v4,  v5,  #12, \sz // t4a
1225*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v6,  v7,  #12, \sz // t5a
1226*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v24, v23, v0.h[2], v0.h[3], \sz // -> t7a
1227*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v17, v30, v0.h[3], v0.h[2], \sz // -> t12
1228*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v2,  v3,  #12, \sz // t6a
1229*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v4,  v5,  #12, \sz // t7a
1230*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v17, v30, v0.h[2], v0.h[3], \sz // -> t13
1231*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v29, v18, v0.h[3], v0.h[2], \sz // -> t14
1232*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v6,  v7,  #12, \sz // t12
1233*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v29, v18, v0.h[2], v0.h[3], \sz // -> t15
1234*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v2,  v3,  #12, \sz // t13
1235*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v30, v4,  v5,  #12, \sz // t14
1236*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v6,  v7,  #12, \sz // t15
1237*c0909341SAndroid Build Coastguard Worker
1238*c0909341SAndroid Build Coastguard Worker        sqsub           v2\sz,   v16\sz,  v21\sz // t2a
1239*c0909341SAndroid Build Coastguard Worker.ifc \o0, v16
1240*c0909341SAndroid Build Coastguard Worker        sqadd           \o0\sz,  v16\sz,  v21\sz // out0
1241*c0909341SAndroid Build Coastguard Worker        sqsub           v21\sz,  v31\sz,  v26\sz // t3a
1242*c0909341SAndroid Build Coastguard Worker        sqadd           \o15\sz, v31\sz,  v26\sz // out15
1243*c0909341SAndroid Build Coastguard Worker.else
1244*c0909341SAndroid Build Coastguard Worker        sqadd           v4\sz,   v16\sz,  v21\sz // out0
1245*c0909341SAndroid Build Coastguard Worker        sqsub           v21\sz,  v31\sz,  v26\sz // t3a
1246*c0909341SAndroid Build Coastguard Worker        sqadd           \o15\sz, v31\sz,  v26\sz // out15
1247*c0909341SAndroid Build Coastguard Worker        mov             \o0\szb, v4\szb
1248*c0909341SAndroid Build Coastguard Worker.endif
1249*c0909341SAndroid Build Coastguard Worker        sqneg           \o15\sz, \o15\sz         // out15
1250*c0909341SAndroid Build Coastguard Worker
1251*c0909341SAndroid Build Coastguard Worker        sqsub           v3\sz,   v29\sz,  v18\sz // t15a
1252*c0909341SAndroid Build Coastguard Worker        sqadd           \o13\sz, v29\sz,  v18\sz // out13
1253*c0909341SAndroid Build Coastguard Worker        sqadd           \o2\sz,  v17\sz,  v30\sz // out2
1254*c0909341SAndroid Build Coastguard Worker        sqsub           v26\sz,  v17\sz,  v30\sz // t14a
1255*c0909341SAndroid Build Coastguard Worker        sqneg           \o13\sz, \o13\sz         // out13
1256*c0909341SAndroid Build Coastguard Worker
1257*c0909341SAndroid Build Coastguard Worker        sqadd           \o1\sz,  v19\sz,  v27\sz // out1
1258*c0909341SAndroid Build Coastguard Worker        sqsub           v27\sz,  v19\sz,  v27\sz // t10
1259*c0909341SAndroid Build Coastguard Worker        sqadd           \o14\sz, v28\sz,  v20\sz // out14
1260*c0909341SAndroid Build Coastguard Worker        sqsub           v20\sz,  v28\sz,  v20\sz // t11
1261*c0909341SAndroid Build Coastguard Worker        sqneg           \o1\sz,  \o1\sz          // out1
1262*c0909341SAndroid Build Coastguard Worker
1263*c0909341SAndroid Build Coastguard Worker        sqadd           \o3\sz,  v22\sz,  v24\sz // out3
1264*c0909341SAndroid Build Coastguard Worker        sqsub           v22\sz,  v22\sz,  v24\sz // t6
1265*c0909341SAndroid Build Coastguard Worker        sqadd           \o12\sz, v25\sz,  v23\sz // out12
1266*c0909341SAndroid Build Coastguard Worker        sqsub           v23\sz,  v25\sz,  v23\sz // t7
1267*c0909341SAndroid Build Coastguard Worker        sqneg           \o3\sz,  \o3\sz          // out3
1268*c0909341SAndroid Build Coastguard Worker
1269*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v24, v25, v2,  v21, v0.h[0], v0.h[0], \sz // -> out8 (v24 or v23)
1270*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v2,  v21, v0.h[0], v0.h[0], \sz // -> out7 (v23 or v24)
1271*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v26, v3,  v0.h[0], v0.h[0], \sz // -> out5 (v21 or v26)
1272*c0909341SAndroid Build Coastguard Worker
1273*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v24, v25, #12, \sz // out8
1274*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v4,  v4,  v5,  #12, \sz // out7
1275*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v5,  v6,  v7,  #12, \sz // out5
1276*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v26, v3,  v0.h[0], v0.h[0], \sz // -> out10 (v26 or v21)
1277*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v22, v23, v0.h[0], v0.h[0], \sz // -> out4 (v20 or v27)
1278*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v6,  v7,  #12, \sz // out10
1279*c0909341SAndroid Build Coastguard Worker
1280*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v22, v23, v0.h[0], v0.h[0], \sz // -> out11 (v27 or v20)
1281*c0909341SAndroid Build Coastguard Worker        smull_smlal     v22, v23, v27, v20, v0.h[0], v0.h[0], \sz // -> out6 (v22 or v25)
1282*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v21, v25, v27, v20, v0.h[0], v0.h[0], \sz // -> out9 (v25 or v22)
1283*c0909341SAndroid Build Coastguard Worker
1284*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \o4, v2,  v3,  #12, \sz // out4
1285*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v6,  v6,  v7,  #12, \sz // out11
1286*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v7,  v21, v25, #12, \sz // out9
1287*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      \o6, v22, v23, #12, \sz // out6
1288*c0909341SAndroid Build Coastguard Worker
1289*c0909341SAndroid Build Coastguard Worker.ifc \o8, v23
1290*c0909341SAndroid Build Coastguard Worker        mov             \o8\szb,  v24\szb
1291*c0909341SAndroid Build Coastguard Worker        mov             \o10\szb, v26\szb
1292*c0909341SAndroid Build Coastguard Worker.endif
1293*c0909341SAndroid Build Coastguard Worker
1294*c0909341SAndroid Build Coastguard Worker        sqneg           \o7\sz,  v4\sz // out7
1295*c0909341SAndroid Build Coastguard Worker        sqneg           \o5\sz,  v5\sz // out5
1296*c0909341SAndroid Build Coastguard Worker        sqneg           \o11\sz, v6\sz // out11
1297*c0909341SAndroid Build Coastguard Worker        sqneg           \o9\sz,  v7\sz // out9
1298*c0909341SAndroid Build Coastguard Worker.endm
1299*c0909341SAndroid Build Coastguard Worker
1300*c0909341SAndroid Build Coastguard Workerfunction inv_adst_8h_x16_neon, export=1
1301*c0909341SAndroid Build Coastguard Worker        iadst_16        v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, .8h, .16b
1302*c0909341SAndroid Build Coastguard Worker        ret
1303*c0909341SAndroid Build Coastguard Workerendfunc
1304*c0909341SAndroid Build Coastguard Worker
1305*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_8h_x16_neon, export=1
1306*c0909341SAndroid Build Coastguard Worker        iadst_16        v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, .8h, .16b
1307*c0909341SAndroid Build Coastguard Worker        ret
1308*c0909341SAndroid Build Coastguard Workerendfunc
1309*c0909341SAndroid Build Coastguard Worker
1310*c0909341SAndroid Build Coastguard Workerfunction inv_adst_4h_x16_neon, export=1
1311*c0909341SAndroid Build Coastguard Worker        iadst_16        v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, .4h, .8b
1312*c0909341SAndroid Build Coastguard Worker        ret
1313*c0909341SAndroid Build Coastguard Workerendfunc
1314*c0909341SAndroid Build Coastguard Worker
1315*c0909341SAndroid Build Coastguard Workerfunction inv_flipadst_4h_x16_neon, export=1
1316*c0909341SAndroid Build Coastguard Worker        iadst_16        v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16, .4h, .8b
1317*c0909341SAndroid Build Coastguard Worker        ret
1318*c0909341SAndroid Build Coastguard Workerendfunc
1319*c0909341SAndroid Build Coastguard Worker
1320*c0909341SAndroid Build Coastguard Workerfunction inv_identity_8h_x16_neon, export=1
1321*c0909341SAndroid Build Coastguard Worker        mov             w16, #2*(5793-4096)*8
1322*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1323*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
1324*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,      v\i\().8h,  v0.h[0]
1325*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().8h,  v\i\().8h,  v\i\().8h
1326*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().8h,  v\i\().8h,  v2.8h
1327*c0909341SAndroid Build Coastguard Worker.endr
1328*c0909341SAndroid Build Coastguard Worker        ret
1329*c0909341SAndroid Build Coastguard Workerendfunc
1330*c0909341SAndroid Build Coastguard Worker
1331*c0909341SAndroid Build Coastguard Workerfunction inv_identity_4h_x16_neon, export=1
1332*c0909341SAndroid Build Coastguard Worker        mov             w16, #2*(5793-4096)*8
1333*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1334*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
1335*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.4h,      v\i\().4h,  v0.h[0]
1336*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().4h,  v\i\().4h,  v\i\().4h
1337*c0909341SAndroid Build Coastguard Worker        sqadd           v\i\().4h,  v\i\().4h,  v2.4h
1338*c0909341SAndroid Build Coastguard Worker.endr
1339*c0909341SAndroid Build Coastguard Worker        ret
1340*c0909341SAndroid Build Coastguard Workerendfunc
1341*c0909341SAndroid Build Coastguard Worker
1342*c0909341SAndroid Build Coastguard Worker.macro identity_8x16_shift2 c
1343*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1344*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,   \i,      \c
1345*c0909341SAndroid Build Coastguard Worker        sshr            v2.8h,   v2.8h,   #1
1346*c0909341SAndroid Build Coastguard Worker        srhadd          \i,      \i,      v2.8h
1347*c0909341SAndroid Build Coastguard Worker.endr
1348*c0909341SAndroid Build Coastguard Worker.endm
1349*c0909341SAndroid Build Coastguard Worker
1350*c0909341SAndroid Build Coastguard Worker.macro identity_8x16_shift1 c
1351*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1352*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,   \i,      \c
1353*c0909341SAndroid Build Coastguard Worker        srshr           v2.8h,   v2.8h,   #1
1354*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      v2.8h
1355*c0909341SAndroid Build Coastguard Worker.endr
1356*c0909341SAndroid Build Coastguard Worker.endm
1357*c0909341SAndroid Build Coastguard Worker
1358*c0909341SAndroid Build Coastguard Worker.macro identity_8x8_shift1 c
1359*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
1360*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,   \i,      \c
1361*c0909341SAndroid Build Coastguard Worker        srshr           v2.8h,   v2.8h,   #1
1362*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      v2.8h
1363*c0909341SAndroid Build Coastguard Worker.endr
1364*c0909341SAndroid Build Coastguard Worker.endm
1365*c0909341SAndroid Build Coastguard Worker
1366*c0909341SAndroid Build Coastguard Worker.macro identity_8x8 c
1367*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
1368*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v2.8h,   \i,      \c
1369*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      \i
1370*c0909341SAndroid Build Coastguard Worker        sqadd           \i,      \i,      v2.8h
1371*c0909341SAndroid Build Coastguard Worker.endr
1372*c0909341SAndroid Build Coastguard Worker.endm
1373*c0909341SAndroid Build Coastguard Worker
1374*c0909341SAndroid Build Coastguard Worker.macro def_horz_16 scale=0, identity=0, shift=2, suffix
1375*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_16x8_neon
1376*c0909341SAndroid Build Coastguard Worker        AARCH64_VALID_CALL_TARGET
1377*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
1378*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
1379*c0909341SAndroid Build Coastguard Worker.if \identity
1380*c0909341SAndroid Build Coastguard Worker        mov             w16, #2*(5793-4096)*8
1381*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1382*c0909341SAndroid Build Coastguard Worker.elseif \scale
1383*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
1384*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1385*c0909341SAndroid Build Coastguard Worker.endif
1386*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1387*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
1388*c0909341SAndroid Build Coastguard Worker        st1             {v7.8h}, [x7], x8
1389*c0909341SAndroid Build Coastguard Worker.endr
1390*c0909341SAndroid Build Coastguard Worker.if \scale
1391*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
1392*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v24, v25, v26, v27, v28, v29, v30, v31
1393*c0909341SAndroid Build Coastguard Worker.endif
1394*c0909341SAndroid Build Coastguard Worker.if \identity
1395*c0909341SAndroid Build Coastguard Worker        identity_8x16_shift2 v0.h[0]
1396*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x8_epilog)
1397*c0909341SAndroid Build Coastguard Worker.else
1398*c0909341SAndroid Build Coastguard Worker        blr             x4
1399*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1400*c0909341SAndroid Build Coastguard Worker        srshr           \i,  \i,  #\shift
1401*c0909341SAndroid Build Coastguard Worker.endr
1402*c0909341SAndroid Build Coastguard Worker.if \shift == 1
1403*c0909341SAndroid Build Coastguard Worker        b               L(horz_16x8_epilog)
1404*c0909341SAndroid Build Coastguard Worker.else
1405*c0909341SAndroid Build Coastguard WorkerL(horz_16x8_epilog):
1406*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
1407*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v24, v25, v26, v27, v28, v29, v30, v31, v4, v5
1408*c0909341SAndroid Build Coastguard Worker
1409*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v24.8h, v17.8h, v25.8h, v18.8h, v26.8h, v19.8h, v27.8h, v20.8h, v28.8h, v21.8h, v29.8h, v22.8h, v30.8h, v23.8h, v31.8h
1410*c0909341SAndroid Build Coastguard Worker        st1             {\i}, [x6], #16
1411*c0909341SAndroid Build Coastguard Worker.endr
1412*c0909341SAndroid Build Coastguard Worker
1413*c0909341SAndroid Build Coastguard Worker        ret             x14
1414*c0909341SAndroid Build Coastguard Worker.endif
1415*c0909341SAndroid Build Coastguard Worker.endif
1416*c0909341SAndroid Build Coastguard Workerendfunc
1417*c0909341SAndroid Build Coastguard Worker.endm
1418*c0909341SAndroid Build Coastguard Worker
1419*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=1, identity=0, shift=1, suffix=_scale
1420*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, identity=1, shift=0, suffix=_identity
1421*c0909341SAndroid Build Coastguard Workerdef_horz_16 scale=0, identity=0, shift=2
1422*c0909341SAndroid Build Coastguard Worker
1423*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_8x16_neon
1424*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
1425*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
1426*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
1427*c0909341SAndroid Build Coastguard Worker.endr
1428*c0909341SAndroid Build Coastguard Worker        blr             x5
1429*c0909341SAndroid Build Coastguard Worker        load_add_store_8x16 x6, x7
1430*c0909341SAndroid Build Coastguard Worker        ret             x14
1431*c0909341SAndroid Build Coastguard Workerendfunc
1432*c0909341SAndroid Build Coastguard Worker
1433*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_16x16_neon
1434*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1435*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
1436*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
1437*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1438*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*16*2)
1439*c0909341SAndroid Build Coastguard Worker.if \i == 8
1440*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
1441*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1442*c0909341SAndroid Build Coastguard Worker.endif
1443*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
1444*c0909341SAndroid Build Coastguard Worker        blr             x9
1445*c0909341SAndroid Build Coastguard Worker.endr
1446*c0909341SAndroid Build Coastguard Worker        b               2f
1447*c0909341SAndroid Build Coastguard Worker1:
1448*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
1449*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
1450*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
1451*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
1452*c0909341SAndroid Build Coastguard Worker.rept 4
1453*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
1454*c0909341SAndroid Build Coastguard Worker.endr
1455*c0909341SAndroid Build Coastguard Worker2:
1456*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
1457*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
1458*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
1459*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
1460*c0909341SAndroid Build Coastguard Worker.endr
1461*c0909341SAndroid Build Coastguard Worker
1462*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
1463*c0909341SAndroid Build Coastguard Worker        ret             x15
1464*c0909341SAndroid Build Coastguard Workerendfunc
1465*c0909341SAndroid Build Coastguard Worker
1466*c0909341SAndroid Build Coastguard Worker.macro def_fn_16x16 txfm1, txfm2, eob_half
1467*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_16x16_8bpc_neon, export=1
1468*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1469*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  16,  2
1470*c0909341SAndroid Build Coastguard Worker.endif
1471*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1472*c0909341SAndroid Build Coastguard Worker        adr             x9,  inv_txfm_horz_identity_16x8_neon
1473*c0909341SAndroid Build Coastguard Worker.else
1474*c0909341SAndroid Build Coastguard Worker        adr             x9,  inv_txfm_horz_16x8_neon
1475*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_8h_x16_neon
1476*c0909341SAndroid Build Coastguard Worker.endif
1477*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_8h_x16_neon
1478*c0909341SAndroid Build Coastguard Worker        mov             x13, #\eob_half
1479*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_16x16_neon
1480*c0909341SAndroid Build Coastguard Workerendfunc
1481*c0909341SAndroid Build Coastguard Worker.endm
1482*c0909341SAndroid Build Coastguard Worker
1483*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, dct, 36
1484*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, identity, 36
1485*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, adst, 36
1486*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, flipadst, 36
1487*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 dct, identity, 8
1488*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, dct, 36
1489*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, adst, 36
1490*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 adst, flipadst, 36
1491*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, dct, 36
1492*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, adst, 36
1493*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 flipadst, flipadst, 36
1494*c0909341SAndroid Build Coastguard Workerdef_fn_16x16 identity, dct, 8
1495*c0909341SAndroid Build Coastguard Worker
1496*c0909341SAndroid Build Coastguard Worker.macro def_fn_416_base variant
1497*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_16x4_neon
1498*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1499*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
1500*c0909341SAndroid Build Coastguard Worker
1501*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1502*c0909341SAndroid Build Coastguard Worker.irp i, v16.4h, v17.4h, v18.4h, v19.4h
1503*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1504*c0909341SAndroid Build Coastguard Worker        st1             {v4.4h}, [x2], #8
1505*c0909341SAndroid Build Coastguard Worker.endr
1506*c0909341SAndroid Build Coastguard Worker.irp i, v16.d, v17.d, v18.d, v19.d
1507*c0909341SAndroid Build Coastguard Worker        ld1             {\i}[1], [x2]
1508*c0909341SAndroid Build Coastguard Worker        st1             {v4.4h}, [x2], #8
1509*c0909341SAndroid Build Coastguard Worker.endr
1510*c0909341SAndroid Build Coastguard Worker        mov             w16, #2*(5793-4096)*8
1511*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1512*c0909341SAndroid Build Coastguard Worker.irp i, v20.4h, v21.4h, v22.4h, v23.4h
1513*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1514*c0909341SAndroid Build Coastguard Worker        st1             {v4.4h}, [x2], #8
1515*c0909341SAndroid Build Coastguard Worker.endr
1516*c0909341SAndroid Build Coastguard Worker.irp i, v20.d, v21.d, v22.d, v23.d
1517*c0909341SAndroid Build Coastguard Worker        ld1             {\i}[1], [x2]
1518*c0909341SAndroid Build Coastguard Worker        st1             {v4.4h}, [x2], #8
1519*c0909341SAndroid Build Coastguard Worker.endr
1520*c0909341SAndroid Build Coastguard Worker
1521*c0909341SAndroid Build Coastguard Worker        identity_8x16_shift1 v0.h[0]
1522*c0909341SAndroid Build Coastguard Worker
1523*c0909341SAndroid Build Coastguard Worker        b               L(itx_16x4_epilog)
1524*c0909341SAndroid Build Coastguard Worker.else
1525*c0909341SAndroid Build Coastguard Worker.irp i, v16.4h, v17.4h, v18.4h, v19.4h, v20.4h, v21.4h, v22.4h, v23.4h, v24.4h, v25.4h, v26.4h, v27.4h, v28.4h, v29.4h, v30.4h, v31.4h
1526*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1527*c0909341SAndroid Build Coastguard Worker        st1             {v4.4h}, [x2], #8
1528*c0909341SAndroid Build Coastguard Worker.endr
1529*c0909341SAndroid Build Coastguard Worker
1530*c0909341SAndroid Build Coastguard Worker        blr             x4
1531*c0909341SAndroid Build Coastguard Worker
1532*c0909341SAndroid Build Coastguard Worker        ins             v16.d[1], v20.d[0]
1533*c0909341SAndroid Build Coastguard Worker        ins             v17.d[1], v21.d[0]
1534*c0909341SAndroid Build Coastguard Worker        ins             v18.d[1], v22.d[0]
1535*c0909341SAndroid Build Coastguard Worker        ins             v19.d[1], v23.d[0]
1536*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h
1537*c0909341SAndroid Build Coastguard Worker        srshr           \i,  \i,  #1
1538*c0909341SAndroid Build Coastguard Worker.endr
1539*c0909341SAndroid Build Coastguard Worker
1540*c0909341SAndroid Build Coastguard Worker        ins             v24.d[1], v28.d[0]
1541*c0909341SAndroid Build Coastguard Worker        ins             v25.d[1], v29.d[0]
1542*c0909341SAndroid Build Coastguard Worker        ins             v26.d[1], v30.d[0]
1543*c0909341SAndroid Build Coastguard Worker        ins             v27.d[1], v31.d[0]
1544*c0909341SAndroid Build Coastguard Worker        srshr           v20.8h,  v24.8h,  #1
1545*c0909341SAndroid Build Coastguard Worker        srshr           v21.8h,  v25.8h,  #1
1546*c0909341SAndroid Build Coastguard Worker        srshr           v22.8h,  v26.8h,  #1
1547*c0909341SAndroid Build Coastguard Worker        srshr           v23.8h,  v27.8h,  #1
1548*c0909341SAndroid Build Coastguard Worker
1549*c0909341SAndroid Build Coastguard WorkerL(itx_16x4_epilog):
1550*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v2,  v3,  v4,  v5
1551*c0909341SAndroid Build Coastguard Worker        blr             x5
1552*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
1553*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x6, x7
1554*c0909341SAndroid Build Coastguard Worker
1555*c0909341SAndroid Build Coastguard Worker        transpose_4x8h_mov v20, v21, v22, v23, v2,  v3,  v4,  v5, v16, v17, v18, v19
1556*c0909341SAndroid Build Coastguard Worker        blr             x5
1557*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #8
1558*c0909341SAndroid Build Coastguard Worker        load_add_store_8x4 x6, x7
1559*c0909341SAndroid Build Coastguard Worker
1560*c0909341SAndroid Build Coastguard Worker        ret             x15
1561*c0909341SAndroid Build Coastguard Worker.endif
1562*c0909341SAndroid Build Coastguard Workerendfunc
1563*c0909341SAndroid Build Coastguard Worker
1564*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_4x16_neon
1565*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1566*c0909341SAndroid Build Coastguard Worker        movi            v2.8h,   #0
1567*c0909341SAndroid Build Coastguard Worker
1568*c0909341SAndroid Build Coastguard Worker        mov             x11, #32
1569*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
1570*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1571*c0909341SAndroid Build Coastguard Worker
1572*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
1573*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1574*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h
1575*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1576*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6], x11
1577*c0909341SAndroid Build Coastguard Worker.endr
1578*c0909341SAndroid Build Coastguard Worker        mov             w16, #(5793-4096)*8
1579*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1580*c0909341SAndroid Build Coastguard Worker        identity_8x4_shift1 v24, v25, v26, v27, v0.h[0]
1581*c0909341SAndroid Build Coastguard Worker.else
1582*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h
1583*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1584*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6], x11
1585*c0909341SAndroid Build Coastguard Worker.endr
1586*c0909341SAndroid Build Coastguard Worker        blr             x4
1587*c0909341SAndroid Build Coastguard Worker        srshr           v24.8h,  v16.8h,  #1
1588*c0909341SAndroid Build Coastguard Worker        srshr           v25.8h,  v17.8h,  #1
1589*c0909341SAndroid Build Coastguard Worker        srshr           v26.8h,  v18.8h,  #1
1590*c0909341SAndroid Build Coastguard Worker        srshr           v27.8h,  v19.8h,  #1
1591*c0909341SAndroid Build Coastguard Worker.endif
1592*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v24, v25, v26, v27, v4,  v5,  v6,  v7
1593*c0909341SAndroid Build Coastguard Worker        ins             v28.d[0], v24.d[1]
1594*c0909341SAndroid Build Coastguard Worker        ins             v29.d[0], v25.d[1]
1595*c0909341SAndroid Build Coastguard Worker        ins             v30.d[0], v26.d[1]
1596*c0909341SAndroid Build Coastguard Worker        ins             v31.d[0], v27.d[1]
1597*c0909341SAndroid Build Coastguard Worker
1598*c0909341SAndroid Build Coastguard Worker        b               2f
1599*c0909341SAndroid Build Coastguard Worker1:
1600*c0909341SAndroid Build Coastguard Worker.irp i, v24.4h, v25.4h, v26.4h, v27.4h, v28.4h, v29.4h, v30.4h, v31.4h
1601*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1602*c0909341SAndroid Build Coastguard Worker.endr
1603*c0909341SAndroid Build Coastguard Worker2:
1604*c0909341SAndroid Build Coastguard Worker        movi            v2.8h,   #0
1605*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h
1606*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1607*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x2], x11
1608*c0909341SAndroid Build Coastguard Worker.endr
1609*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1610*c0909341SAndroid Build Coastguard Worker        mov             w16, #(5793-4096)*8
1611*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1612*c0909341SAndroid Build Coastguard Worker        identity_8x4_shift1 v16, v17, v18, v19, v0.h[0]
1613*c0909341SAndroid Build Coastguard Worker
1614*c0909341SAndroid Build Coastguard Worker        b               L(itx_4x16_epilog)
1615*c0909341SAndroid Build Coastguard Worker.else
1616*c0909341SAndroid Build Coastguard Worker        blr             x4
1617*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h
1618*c0909341SAndroid Build Coastguard Worker        srshr           \i,  \i,  #1
1619*c0909341SAndroid Build Coastguard Worker.endr
1620*c0909341SAndroid Build Coastguard WorkerL(itx_4x16_epilog):
1621*c0909341SAndroid Build Coastguard Worker        transpose_4x8h  v16, v17, v18, v19, v4,  v5,  v6,  v7
1622*c0909341SAndroid Build Coastguard Worker        ins             v20.d[0], v16.d[1]
1623*c0909341SAndroid Build Coastguard Worker        ins             v21.d[0], v17.d[1]
1624*c0909341SAndroid Build Coastguard Worker        ins             v22.d[0], v18.d[1]
1625*c0909341SAndroid Build Coastguard Worker        ins             v23.d[0], v19.d[1]
1626*c0909341SAndroid Build Coastguard Worker
1627*c0909341SAndroid Build Coastguard Worker        blr             x5
1628*c0909341SAndroid Build Coastguard Worker
1629*c0909341SAndroid Build Coastguard Worker        load_add_store_4x16 x0, x6
1630*c0909341SAndroid Build Coastguard Worker
1631*c0909341SAndroid Build Coastguard Worker        ret             x15
1632*c0909341SAndroid Build Coastguard Worker.endif
1633*c0909341SAndroid Build Coastguard Workerendfunc
1634*c0909341SAndroid Build Coastguard Worker.endm
1635*c0909341SAndroid Build Coastguard Worker
1636*c0909341SAndroid Build Coastguard Workerdef_fn_416_base identity_
1637*c0909341SAndroid Build Coastguard Workerdef_fn_416_base
1638*c0909341SAndroid Build Coastguard Worker
1639*c0909341SAndroid Build Coastguard Worker.macro def_fn_416 w, h, txfm1, txfm2, eob_half
1640*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
1641*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1642*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1643*c0909341SAndroid Build Coastguard Worker.endif
1644*c0909341SAndroid Build Coastguard Worker.if \w == 4
1645*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1646*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_8h_x\w\()_neon
1647*c0909341SAndroid Build Coastguard Worker.endif
1648*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_4h_x\h\()_neon
1649*c0909341SAndroid Build Coastguard Worker        mov             w13, #\eob_half
1650*c0909341SAndroid Build Coastguard Worker.else
1651*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1652*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_4h_x\w\()_neon
1653*c0909341SAndroid Build Coastguard Worker.endif
1654*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_8h_x\h\()_neon
1655*c0909341SAndroid Build Coastguard Worker.endif
1656*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1657*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_\w\()x\h\()_neon
1658*c0909341SAndroid Build Coastguard Worker.else
1659*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1660*c0909341SAndroid Build Coastguard Worker.endif
1661*c0909341SAndroid Build Coastguard Workerendfunc
1662*c0909341SAndroid Build Coastguard Worker.endm
1663*c0909341SAndroid Build Coastguard Worker
1664*c0909341SAndroid Build Coastguard Worker.macro def_fns_416 w, h
1665*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, dct, 29
1666*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, identity, 29
1667*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, adst, 29
1668*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, flipadst, 29
1669*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, dct, identity, 8
1670*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, dct, 29
1671*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, adst, 29
1672*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, flipadst, 29
1673*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, dct, 29
1674*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, adst, 29
1675*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, flipadst, 29
1676*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, dct, 32
1677*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, adst, identity, 8
1678*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, flipadst, identity, 8
1679*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, adst, 32
1680*c0909341SAndroid Build Coastguard Workerdef_fn_416 \w, \h, identity, flipadst, 32
1681*c0909341SAndroid Build Coastguard Worker.endm
1682*c0909341SAndroid Build Coastguard Worker
1683*c0909341SAndroid Build Coastguard Workerdef_fns_416 4, 16
1684*c0909341SAndroid Build Coastguard Workerdef_fns_416 16, 4
1685*c0909341SAndroid Build Coastguard Worker
1686*c0909341SAndroid Build Coastguard Worker
1687*c0909341SAndroid Build Coastguard Worker.macro def_fn_816_base variant
1688*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_16x8_neon
1689*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1690*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
1691*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
1692*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1693*c0909341SAndroid Build Coastguard Worker
1694*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1695*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1696*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [x2], #16
1697*c0909341SAndroid Build Coastguard Worker.endr
1698*c0909341SAndroid Build Coastguard Worker
1699*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
1700*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v24, v25, v26, v27, v28, v29, v30, v31
1701*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1702*c0909341SAndroid Build Coastguard Worker        mov             w16, #2*(5793-4096)*8
1703*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1704*c0909341SAndroid Build Coastguard Worker        identity_8x16_shift1 v0.h[0]
1705*c0909341SAndroid Build Coastguard Worker
1706*c0909341SAndroid Build Coastguard Worker        b               L(itx_16x8_epilog)
1707*c0909341SAndroid Build Coastguard Worker.else
1708*c0909341SAndroid Build Coastguard Worker        blr             x4
1709*c0909341SAndroid Build Coastguard Worker
1710*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1711*c0909341SAndroid Build Coastguard Worker        srshr           \i,  \i,  #1
1712*c0909341SAndroid Build Coastguard Worker.endr
1713*c0909341SAndroid Build Coastguard Worker
1714*c0909341SAndroid Build Coastguard WorkerL(itx_16x8_epilog):
1715*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
1716*c0909341SAndroid Build Coastguard Worker
1717*c0909341SAndroid Build Coastguard Worker        blr             x5
1718*c0909341SAndroid Build Coastguard Worker
1719*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
1720*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x6, x7
1721*c0909341SAndroid Build Coastguard Worker
1722*c0909341SAndroid Build Coastguard Worker        transpose_8x8h_mov v24, v25, v26, v27, v28, v29, v30, v31, v2, v3, v16, v17, v18, v19, v20, v21, v22, v23
1723*c0909341SAndroid Build Coastguard Worker
1724*c0909341SAndroid Build Coastguard Worker        blr             x5
1725*c0909341SAndroid Build Coastguard Worker
1726*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #8
1727*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7
1728*c0909341SAndroid Build Coastguard Worker
1729*c0909341SAndroid Build Coastguard Worker        ret             x15
1730*c0909341SAndroid Build Coastguard Worker.endif
1731*c0909341SAndroid Build Coastguard Workerendfunc
1732*c0909341SAndroid Build Coastguard Worker
1733*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_\variant\()add_8x16_neon
1734*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
1735*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,   #0
1736*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
1737*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1738*c0909341SAndroid Build Coastguard Worker        mov             x11, #32
1739*c0909341SAndroid Build Coastguard Worker
1740*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w13
1741*c0909341SAndroid Build Coastguard Worker        b.lt            1f
1742*c0909341SAndroid Build Coastguard Worker
1743*c0909341SAndroid Build Coastguard Worker        add             x6,  x2,  #16
1744*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1745*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1746*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1747*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [x6], x11
1748*c0909341SAndroid Build Coastguard Worker.endr
1749*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v24, v25, v26, v27, v28, v29, v30, v31
1750*c0909341SAndroid Build Coastguard Worker        // The identity shl #1 and downshift srshr #1 cancel out
1751*c0909341SAndroid Build Coastguard Worker.else
1752*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
1753*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x6]
1754*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [x6], x11
1755*c0909341SAndroid Build Coastguard Worker.endr
1756*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
1757*c0909341SAndroid Build Coastguard Worker        blr             x4
1758*c0909341SAndroid Build Coastguard Worker
1759*c0909341SAndroid Build Coastguard Worker        srshr           v24.8h,  v16.8h,  #1
1760*c0909341SAndroid Build Coastguard Worker        srshr           v25.8h,  v17.8h,  #1
1761*c0909341SAndroid Build Coastguard Worker        srshr           v26.8h,  v18.8h,  #1
1762*c0909341SAndroid Build Coastguard Worker        srshr           v27.8h,  v19.8h,  #1
1763*c0909341SAndroid Build Coastguard Worker        srshr           v28.8h,  v20.8h,  #1
1764*c0909341SAndroid Build Coastguard Worker        srshr           v29.8h,  v21.8h,  #1
1765*c0909341SAndroid Build Coastguard Worker        srshr           v30.8h,  v22.8h,  #1
1766*c0909341SAndroid Build Coastguard Worker        srshr           v31.8h,  v23.8h,  #1
1767*c0909341SAndroid Build Coastguard Worker.endif
1768*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v24, v25, v26, v27, v28, v29, v30, v31, v2, v3
1769*c0909341SAndroid Build Coastguard Worker
1770*c0909341SAndroid Build Coastguard Worker        b               2f
1771*c0909341SAndroid Build Coastguard Worker
1772*c0909341SAndroid Build Coastguard Worker1:
1773*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
1774*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
1775*c0909341SAndroid Build Coastguard Worker.endr
1776*c0909341SAndroid Build Coastguard Worker
1777*c0909341SAndroid Build Coastguard Worker2:
1778*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,   #0
1779*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
1780*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
1781*c0909341SAndroid Build Coastguard Worker
1782*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
1783*c0909341SAndroid Build Coastguard Worker        ld1             {\i},    [x2]
1784*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h}, [x2], x11
1785*c0909341SAndroid Build Coastguard Worker.endr
1786*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
1787*c0909341SAndroid Build Coastguard Worker.ifc \variant, identity_
1788*c0909341SAndroid Build Coastguard Worker        // The identity shl #1 and downshift srshr #1 cancel out
1789*c0909341SAndroid Build Coastguard Worker
1790*c0909341SAndroid Build Coastguard Worker        b               L(itx_8x16_epilog)
1791*c0909341SAndroid Build Coastguard Worker.else
1792*c0909341SAndroid Build Coastguard Worker        blr             x4
1793*c0909341SAndroid Build Coastguard Worker
1794*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
1795*c0909341SAndroid Build Coastguard Worker        srshr           \i,  \i,  #1
1796*c0909341SAndroid Build Coastguard Worker.endr
1797*c0909341SAndroid Build Coastguard Worker
1798*c0909341SAndroid Build Coastguard WorkerL(itx_8x16_epilog):
1799*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v2, v3
1800*c0909341SAndroid Build Coastguard Worker
1801*c0909341SAndroid Build Coastguard Worker        blr             x5
1802*c0909341SAndroid Build Coastguard Worker
1803*c0909341SAndroid Build Coastguard Worker        load_add_store_8x16 x0, x6
1804*c0909341SAndroid Build Coastguard Worker
1805*c0909341SAndroid Build Coastguard Worker        ret             x15
1806*c0909341SAndroid Build Coastguard Worker.endif
1807*c0909341SAndroid Build Coastguard Workerendfunc
1808*c0909341SAndroid Build Coastguard Worker.endm
1809*c0909341SAndroid Build Coastguard Worker
1810*c0909341SAndroid Build Coastguard Workerdef_fn_816_base identity_
1811*c0909341SAndroid Build Coastguard Workerdef_fn_816_base
1812*c0909341SAndroid Build Coastguard Worker
1813*c0909341SAndroid Build Coastguard Worker.macro def_fn_816 w, h, txfm1, txfm2, eob_half
1814*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_neon, export=1
1815*c0909341SAndroid Build Coastguard Worker.ifc \txfm1\()_\txfm2, dct_dct
1816*c0909341SAndroid Build Coastguard Worker        idct_dc         \w,  \h,  1
1817*c0909341SAndroid Build Coastguard Worker.endif
1818*c0909341SAndroid Build Coastguard Worker.ifnc \txfm1, identity
1819*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_\txfm1\()_8h_x\w\()_neon
1820*c0909341SAndroid Build Coastguard Worker.endif
1821*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_\txfm2\()_8h_x\h\()_neon
1822*c0909341SAndroid Build Coastguard Worker.if \w == 8
1823*c0909341SAndroid Build Coastguard Worker        mov             x13, #\eob_half
1824*c0909341SAndroid Build Coastguard Worker.endif
1825*c0909341SAndroid Build Coastguard Worker.ifc \txfm1, identity
1826*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_identity_add_\w\()x\h\()_neon
1827*c0909341SAndroid Build Coastguard Worker.else
1828*c0909341SAndroid Build Coastguard Worker        b               inv_txfm_add_\w\()x\h\()_neon
1829*c0909341SAndroid Build Coastguard Worker.endif
1830*c0909341SAndroid Build Coastguard Workerendfunc
1831*c0909341SAndroid Build Coastguard Worker.endm
1832*c0909341SAndroid Build Coastguard Worker
1833*c0909341SAndroid Build Coastguard Worker.macro def_fns_816 w, h
1834*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, dct, 43
1835*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, identity, 43
1836*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, adst, 43
1837*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, flipadst, 43
1838*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, dct, identity, 8
1839*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, dct, 43
1840*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, adst, 43
1841*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, flipadst, 43
1842*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, dct, 43
1843*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, adst, 43
1844*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, flipadst, 43
1845*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, dct, 64
1846*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, adst, identity, 8
1847*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, flipadst, identity, 8
1848*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, adst, 64
1849*c0909341SAndroid Build Coastguard Workerdef_fn_816 \w, \h, identity, flipadst, 64
1850*c0909341SAndroid Build Coastguard Worker.endm
1851*c0909341SAndroid Build Coastguard Worker
1852*c0909341SAndroid Build Coastguard Workerdef_fns_816 8, 16
1853*c0909341SAndroid Build Coastguard Workerdef_fns_816 16, 8
1854*c0909341SAndroid Build Coastguard Worker
1855*c0909341SAndroid Build Coastguard Workerfunction inv_dct32_odd_8h_x16_neon, export=1
1856*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs, 2*16
1857*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x16]
1858*c0909341SAndroid Build Coastguard Worker        sub             x16, x16, #2*16
1859*c0909341SAndroid Build Coastguard Worker
1860*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v16, v31, v0.h[0], v0.h[1], .8h // -> t16a
1861*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v16, v31, v0.h[1], v0.h[0], .8h // -> t31a
1862*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v24, v23, v0.h[2], v0.h[3], .8h // -> t17a
1863*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v16, v2,  v3,  #12, .8h                   // t16a
1864*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v31, v4,  v5,  #12, .8h                   // t31a
1865*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v24, v23, v0.h[3], v0.h[2], .8h // -> t30a
1866*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v20, v27, v0.h[4], v0.h[5], .8h // -> t18a
1867*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v6,  v7,  #12, .8h                   // t17a
1868*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v2,  v3,  #12, .8h                   // t30a
1869*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v20, v27, v0.h[5], v0.h[4], .8h // -> t29a
1870*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v28, v19, v0.h[6], v0.h[7], .8h // -> t19a
1871*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v4,  v5,  #12, .8h                   // t18a
1872*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, .8h                   // t29a
1873*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v28, v19, v0.h[7], v0.h[6], .8h // -> t28a
1874*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v18, v29, v1.h[0], v1.h[1], .8h // -> t20a
1875*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v28, v2,  v3,  #12, .8h                   // t19a
1876*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v4,  v5,  #12, .8h                   // t28a
1877*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v18, v29, v1.h[1], v1.h[0], .8h // -> t27a
1878*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v26, v21, v1.h[2], v1.h[3], .8h // -> t21a
1879*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v6,  v7,  #12, .8h                   // t20a
1880*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v2,  v3,  #12, .8h                   // t27a
1881*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v26, v21, v1.h[3], v1.h[2], .8h // -> t26a
1882*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v22, v25, v1.h[4], v1.h[5], .8h // -> t22a
1883*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v4,  v5,  #12, .8h                   // t21a
1884*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v6,  v7,  #12, .8h                   // t26a
1885*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v22, v25, v1.h[5], v1.h[4], .8h // -> t25a
1886*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v30, v17, v1.h[6], v1.h[7], .8h // -> t23a
1887*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v2,  v3,  #12, .8h                   // t22a
1888*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v4,  v5,  #12, .8h                   // t25a
1889*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v30, v17, v1.h[7], v1.h[6], .8h // -> t24a
1890*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v30, v6,  v7,  #12, .8h                   // t23a
1891*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v2,  v3,  #12, .8h                   // t24a
1892*c0909341SAndroid Build Coastguard Worker
1893*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h}, [x16]
1894*c0909341SAndroid Build Coastguard Worker
1895*c0909341SAndroid Build Coastguard Worker        sqsub           v2.8h,   v16.8h,  v24.8h // t17
1896*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v16.8h,  v24.8h // t16
1897*c0909341SAndroid Build Coastguard Worker        sqsub           v3.8h,   v31.8h,  v23.8h // t30
1898*c0909341SAndroid Build Coastguard Worker        sqadd           v31.8h,  v31.8h,  v23.8h // t31
1899*c0909341SAndroid Build Coastguard Worker        sqsub           v24.8h,  v28.8h,  v20.8h // t18
1900*c0909341SAndroid Build Coastguard Worker        sqadd           v28.8h,  v28.8h,  v20.8h // t19
1901*c0909341SAndroid Build Coastguard Worker        sqadd           v23.8h,  v18.8h,  v26.8h // t20
1902*c0909341SAndroid Build Coastguard Worker        sqsub           v18.8h,  v18.8h,  v26.8h // t21
1903*c0909341SAndroid Build Coastguard Worker        sqsub           v20.8h,  v30.8h,  v22.8h // t22
1904*c0909341SAndroid Build Coastguard Worker        sqadd           v30.8h,  v30.8h,  v22.8h // t23
1905*c0909341SAndroid Build Coastguard Worker        sqadd           v26.8h,  v17.8h,  v25.8h // t24
1906*c0909341SAndroid Build Coastguard Worker        sqsub           v17.8h,  v17.8h,  v25.8h // t25
1907*c0909341SAndroid Build Coastguard Worker        sqsub           v22.8h,  v29.8h,  v21.8h // t26
1908*c0909341SAndroid Build Coastguard Worker        sqadd           v29.8h,  v29.8h,  v21.8h // t27
1909*c0909341SAndroid Build Coastguard Worker        sqadd           v25.8h,  v19.8h,  v27.8h // t28
1910*c0909341SAndroid Build Coastguard Worker        sqsub           v19.8h,  v19.8h,  v27.8h // t29
1911*c0909341SAndroid Build Coastguard Worker
1912*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v3,  v2,  v0.h[4], v0.h[5], .8h // -> t17a
1913*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v3,  v2,  v0.h[5], v0.h[4], .8h // -> t30a
1914*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v19, v24, v0.h[5], v0.h[4], .8h // -> t18a
1915*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v4,  v5,  #12, .8h                   // t17a
1916*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v6,  v7,  #12, .8h                   // t30a
1917*c0909341SAndroid Build Coastguard Worker        neg             v2.4s,   v2.4s                            // -> t18a
1918*c0909341SAndroid Build Coastguard Worker        neg             v3.4s,   v3.4s                            // -> t18a
1919*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v19, v24, v0.h[4], v0.h[5], .8h // -> t29a
1920*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v22, v18, v0.h[6], v0.h[7], .8h // -> t21a
1921*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v2,  v3,  #12, .8h                   // t18a
1922*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v4,  v5,  #12, .8h                   // t29a
1923*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v22, v18, v0.h[7], v0.h[6], .8h // -> t26a
1924*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v17, v20, v0.h[7], v0.h[6], .8h // -> t22a
1925*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v6,  v7,  #12, .8h                   // t21a
1926*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v2,  v3,  #12, .8h                   // t26a
1927*c0909341SAndroid Build Coastguard Worker        neg             v4.4s,   v4.4s                            // -> t22a
1928*c0909341SAndroid Build Coastguard Worker        neg             v5.4s,   v5.4s                            // -> t22a
1929*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v17, v20, v0.h[6], v0.h[7], .8h // -> t25a
1930*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v17, v4,  v5,  #12, .8h                   // t22a
1931*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v6,  v7,  #12, .8h                   // t25a
1932*c0909341SAndroid Build Coastguard Worker
1933*c0909341SAndroid Build Coastguard Worker        sqsub           v2.8h,   v27.8h,  v24.8h // t29
1934*c0909341SAndroid Build Coastguard Worker        sqadd           v27.8h,  v27.8h,  v24.8h // t30
1935*c0909341SAndroid Build Coastguard Worker        sqsub           v3.8h,   v21.8h,  v19.8h // t18
1936*c0909341SAndroid Build Coastguard Worker        sqadd           v21.8h,  v21.8h,  v19.8h // t17
1937*c0909341SAndroid Build Coastguard Worker        sqsub           v24.8h,  v16.8h,  v28.8h // t19a
1938*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v16.8h,  v28.8h // t16a
1939*c0909341SAndroid Build Coastguard Worker        sqsub           v19.8h,  v30.8h,  v23.8h // t20a
1940*c0909341SAndroid Build Coastguard Worker        sqadd           v30.8h,  v30.8h,  v23.8h // t23a
1941*c0909341SAndroid Build Coastguard Worker        sqsub           v28.8h,  v17.8h,  v22.8h // t21
1942*c0909341SAndroid Build Coastguard Worker        sqadd           v17.8h,  v17.8h,  v22.8h // t22
1943*c0909341SAndroid Build Coastguard Worker        sqadd           v23.8h,  v26.8h,  v29.8h // t24a
1944*c0909341SAndroid Build Coastguard Worker        sqsub           v26.8h,  v26.8h,  v29.8h // t27a
1945*c0909341SAndroid Build Coastguard Worker        sqadd           v22.8h,  v20.8h,  v18.8h // t25
1946*c0909341SAndroid Build Coastguard Worker        sqsub           v20.8h,  v20.8h,  v18.8h // t26
1947*c0909341SAndroid Build Coastguard Worker        sqsub           v29.8h,  v31.8h,  v25.8h // t28a
1948*c0909341SAndroid Build Coastguard Worker        sqadd           v31.8h,  v31.8h,  v25.8h // t31a
1949*c0909341SAndroid Build Coastguard Worker
1950*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v2,  v3,  v0.h[2], v0.h[3], .8h // -> t18a
1951*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v2,  v3,  v0.h[3], v0.h[2], .8h // -> t29a
1952*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v29, v24, v0.h[2], v0.h[3], .8h // -> t19
1953*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v4,  v5,  #12, .8h                   // t18a
1954*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v6,  v7,  #12, .8h                   // t29a
1955*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v29, v24, v0.h[3], v0.h[2], .8h // -> t28
1956*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v26, v19, v0.h[3], v0.h[2], .8h // -> t20
1957*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v2,  v3,  #12, .8h                   // t19
1958*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v4,  v5,  #12, .8h                   // t28
1959*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s                            // -> t20
1960*c0909341SAndroid Build Coastguard Worker        neg             v7.4s,   v7.4s                            // -> t20
1961*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v26, v19, v0.h[2], v0.h[3], .8h // -> t27
1962*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v20, v28, v0.h[3], v0.h[2], .8h // -> t21a
1963*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v6,  v7,  #12, .8h                   // t20
1964*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v2,  v3,  #12, .8h                   // t27
1965*c0909341SAndroid Build Coastguard Worker        neg             v4.4s,   v4.4s                            // -> t21a
1966*c0909341SAndroid Build Coastguard Worker        neg             v5.4s,   v5.4s                            // -> t21a
1967*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v20, v28, v0.h[2], v0.h[3], .8h // -> t26a
1968*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v4,  v5,  #12, .8h                   // t21a
1969*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v28, v6,  v7,  #12, .8h                   // t26a
1970*c0909341SAndroid Build Coastguard Worker
1971*c0909341SAndroid Build Coastguard Worker        sqsub           v2.8h,   v16.8h,  v30.8h // t23
1972*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v16.8h,  v30.8h // t16 = out16
1973*c0909341SAndroid Build Coastguard Worker        sqsub           v3.8h,   v31.8h,  v23.8h // t24
1974*c0909341SAndroid Build Coastguard Worker        sqadd           v31.8h,  v31.8h,  v23.8h // t31 = out31
1975*c0909341SAndroid Build Coastguard Worker        sqsub           v23.8h,  v21.8h,  v17.8h // t22a
1976*c0909341SAndroid Build Coastguard Worker        sqadd           v17.8h,  v21.8h,  v17.8h // t17a = out17
1977*c0909341SAndroid Build Coastguard Worker        sqadd           v30.8h,  v27.8h,  v22.8h // t30a = out30
1978*c0909341SAndroid Build Coastguard Worker        sqsub           v21.8h,  v27.8h,  v22.8h // t25a
1979*c0909341SAndroid Build Coastguard Worker        sqsub           v27.8h,  v18.8h,  v20.8h // t21
1980*c0909341SAndroid Build Coastguard Worker        sqadd           v18.8h,  v18.8h,  v20.8h // t18 = out18
1981*c0909341SAndroid Build Coastguard Worker        sqadd           v4.8h,   v29.8h,  v26.8h // t19a = out19
1982*c0909341SAndroid Build Coastguard Worker        sqsub           v26.8h,  v29.8h,  v26.8h // t20a
1983*c0909341SAndroid Build Coastguard Worker        sqadd           v29.8h,  v25.8h,  v28.8h // t29 = out29
1984*c0909341SAndroid Build Coastguard Worker        sqsub           v25.8h,  v25.8h,  v28.8h // t26
1985*c0909341SAndroid Build Coastguard Worker        sqadd           v28.8h,  v24.8h,  v19.8h // t28a = out28
1986*c0909341SAndroid Build Coastguard Worker        sqsub           v24.8h,  v24.8h,  v19.8h // t27a
1987*c0909341SAndroid Build Coastguard Worker        mov             v19.16b, v4.16b          // out19
1988*c0909341SAndroid Build Coastguard Worker
1989*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v24, v26, v0.h[0], v0.h[0], .8h // -> t20
1990*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v24, v26, v0.h[0], v0.h[0], .8h // -> t27
1991*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v4,  v5,  #12, .8h   // t20
1992*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v6,  v7,  #12, .8h   // t27
1993*c0909341SAndroid Build Coastguard Worker
1994*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v25, v27, v0.h[0], v0.h[0], .8h // -> t26a
1995*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v25, v27, v0.h[0], v0.h[0], .8h // -> t21a
1996*c0909341SAndroid Build Coastguard Worker        mov             v27.16b,  v22.16b         // t27
1997*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v4,  v5,  #12, .8h   // t26a
1998*c0909341SAndroid Build Coastguard Worker
1999*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v24, v25, v21, v23, v0.h[0], v0.h[0], .8h // -> t22
2000*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v21, v23, v0.h[0], v0.h[0], .8h // -> t25
2001*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v6,  v7,  #12, .8h   // t21a
2002*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v22, v24, v25, #12, .8h   // t22
2003*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v4,  v5,  #12, .8h   // t25
2004*c0909341SAndroid Build Coastguard Worker
2005*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v3,  v2,  v0.h[0], v0.h[0], .8h // -> t23a
2006*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v3,  v2,  v0.h[0], v0.h[0], .8h // -> t24a
2007*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v23, v4,  v5,  #12, .8h   // t23a
2008*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v24, v6,  v7,  #12, .8h   // t24a
2009*c0909341SAndroid Build Coastguard Worker
2010*c0909341SAndroid Build Coastguard Worker        ret
2011*c0909341SAndroid Build Coastguard Workerendfunc
2012*c0909341SAndroid Build Coastguard Worker
2013*c0909341SAndroid Build Coastguard Worker.macro def_horz_32 scale=0, shift=2, suffix
2014*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz\suffix\()_dct_32x8_neon
2015*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2016*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2017*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
2018*c0909341SAndroid Build Coastguard Worker.if \scale
2019*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
2020*c0909341SAndroid Build Coastguard Worker        dup             v0.4h,   w16
2021*c0909341SAndroid Build Coastguard Worker.endif
2022*c0909341SAndroid Build Coastguard Worker
2023*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
2024*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
2025*c0909341SAndroid Build Coastguard Worker        st1             {v7.8h}, [x7], x8
2026*c0909341SAndroid Build Coastguard Worker.endr
2027*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2028*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
2029*c0909341SAndroid Build Coastguard Worker.if \scale
2030*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
2031*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[0], v24, v25, v26, v27, v28, v29, v30, v31
2032*c0909341SAndroid Build Coastguard Worker.endif
2033*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x16_neon
2034*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2035*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v24, v25, v26, v27, v28, v29, v30, v31, v4, v5
2036*c0909341SAndroid Build Coastguard Worker
2037*c0909341SAndroid Build Coastguard Worker.macro store1 r0, r1
2038*c0909341SAndroid Build Coastguard Worker        st1             {\r0}, [x6], #16
2039*c0909341SAndroid Build Coastguard Worker        st1             {\r1}, [x6], #16
2040*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #32
2041*c0909341SAndroid Build Coastguard Worker.endm
2042*c0909341SAndroid Build Coastguard Worker        store1          v16.8h,  v24.8h
2043*c0909341SAndroid Build Coastguard Worker        store1          v17.8h,  v25.8h
2044*c0909341SAndroid Build Coastguard Worker        store1          v18.8h,  v26.8h
2045*c0909341SAndroid Build Coastguard Worker        store1          v19.8h,  v27.8h
2046*c0909341SAndroid Build Coastguard Worker        store1          v20.8h,  v28.8h
2047*c0909341SAndroid Build Coastguard Worker        store1          v21.8h,  v29.8h
2048*c0909341SAndroid Build Coastguard Worker        store1          v22.8h,  v30.8h
2049*c0909341SAndroid Build Coastguard Worker        store1          v23.8h,  v31.8h
2050*c0909341SAndroid Build Coastguard Worker.purgem store1
2051*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #64*8
2052*c0909341SAndroid Build Coastguard Worker
2053*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2054*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
2055*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x7]
2056*c0909341SAndroid Build Coastguard Worker        st1             {v7.8h}, [x7], x8
2057*c0909341SAndroid Build Coastguard Worker.endr
2058*c0909341SAndroid Build Coastguard Worker.if \scale
2059*c0909341SAndroid Build Coastguard Worker        // This relies on the fact that the idct also leaves the right coeff in v0.h[1]
2060*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[1], v16, v17, v18, v19, v20, v21, v22, v23
2061*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v0.h[1], v24, v25, v26, v27, v28, v29, v30, v31
2062*c0909341SAndroid Build Coastguard Worker.endif
2063*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_8h_x16_neon
2064*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v31, v30, v29, v28, v27, v26, v25, v24, v4, v5
2065*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v23, v22, v21, v20, v19, v18, v17, v16, v4, v5
2066*c0909341SAndroid Build Coastguard Worker.macro store2 r0, r1, shift
2067*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h, v5.8h}, [x6]
2068*c0909341SAndroid Build Coastguard Worker        sqsub           v7.8h,   v4.8h,   \r0
2069*c0909341SAndroid Build Coastguard Worker        sqsub           v6.8h,   v5.8h,   \r1
2070*c0909341SAndroid Build Coastguard Worker        sqadd           v4.8h,   v4.8h,   \r0
2071*c0909341SAndroid Build Coastguard Worker        sqadd           v5.8h,   v5.8h,   \r1
2072*c0909341SAndroid Build Coastguard Worker        rev64           v6.8h,   v6.8h
2073*c0909341SAndroid Build Coastguard Worker        rev64           v7.8h,   v7.8h
2074*c0909341SAndroid Build Coastguard Worker        srshr           v4.8h,   v4.8h,   #\shift
2075*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #\shift
2076*c0909341SAndroid Build Coastguard Worker        srshr           v6.8h,   v6.8h,   #\shift
2077*c0909341SAndroid Build Coastguard Worker        srshr           v7.8h,   v7.8h,   #\shift
2078*c0909341SAndroid Build Coastguard Worker        ext             v6.16b,  v6.16b,  v6.16b,  #8
2079*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h}, [x6], #32
2080*c0909341SAndroid Build Coastguard Worker        ext             v7.16b,  v7.16b,  v7.16b,  #8
2081*c0909341SAndroid Build Coastguard Worker        st1             {v6.8h, v7.8h}, [x6], #32
2082*c0909341SAndroid Build Coastguard Worker.endm
2083*c0909341SAndroid Build Coastguard Worker
2084*c0909341SAndroid Build Coastguard Worker        store2          v31.8h,  v23.8h, \shift
2085*c0909341SAndroid Build Coastguard Worker        store2          v30.8h,  v22.8h, \shift
2086*c0909341SAndroid Build Coastguard Worker        store2          v29.8h,  v21.8h, \shift
2087*c0909341SAndroid Build Coastguard Worker        store2          v28.8h,  v20.8h, \shift
2088*c0909341SAndroid Build Coastguard Worker        store2          v27.8h,  v19.8h, \shift
2089*c0909341SAndroid Build Coastguard Worker        store2          v26.8h,  v18.8h, \shift
2090*c0909341SAndroid Build Coastguard Worker        store2          v25.8h,  v17.8h, \shift
2091*c0909341SAndroid Build Coastguard Worker        store2          v24.8h,  v16.8h, \shift
2092*c0909341SAndroid Build Coastguard Worker.purgem store2
2093*c0909341SAndroid Build Coastguard Worker        ret             x14
2094*c0909341SAndroid Build Coastguard Workerendfunc
2095*c0909341SAndroid Build Coastguard Worker.endm
2096*c0909341SAndroid Build Coastguard Worker
2097*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=0, shift=2
2098*c0909341SAndroid Build Coastguard Workerdef_horz_32 scale=1, shift=1, suffix=_scale
2099*c0909341SAndroid Build Coastguard Worker
2100*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x32_neon
2101*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2102*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
2103*c0909341SAndroid Build Coastguard Worker
2104*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2105*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2106*c0909341SAndroid Build Coastguard Worker.endr
2107*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2108*c0909341SAndroid Build Coastguard Worker
2109*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x16_neon
2110*c0909341SAndroid Build Coastguard Worker
2111*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2112*c0909341SAndroid Build Coastguard Worker        st1             {v\i\().8h}, [x7], x8
2113*c0909341SAndroid Build Coastguard Worker.endr
2114*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2115*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
2116*c0909341SAndroid Build Coastguard Worker
2117*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
2118*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2119*c0909341SAndroid Build Coastguard Worker.endr
2120*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #4
2121*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsr #1
2122*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_8h_x16_neon
2123*c0909341SAndroid Build Coastguard Worker
2124*c0909341SAndroid Build Coastguard Worker        neg             x9,  x8
2125*c0909341SAndroid Build Coastguard Worker        mov             x10, x6
2126*c0909341SAndroid Build Coastguard Worker.macro combine r0, r1, r2, r3, op, stride
2127*c0909341SAndroid Build Coastguard Worker        ld1             {v5.8h}, [x7],    \stride
2128*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b}, [x10],   x1
2129*c0909341SAndroid Build Coastguard Worker        ld1             {v6.8h}, [x7],    \stride
2130*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8b}, [x10],   x1
2131*c0909341SAndroid Build Coastguard Worker        \op             v5.8h,   v5.8h,   \r0
2132*c0909341SAndroid Build Coastguard Worker        ld1             {v7.8h}, [x7],    \stride
2133*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8b}, [x10],   x1
2134*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
2135*c0909341SAndroid Build Coastguard Worker        \op             v6.8h,   v6.8h,   \r1
2136*c0909341SAndroid Build Coastguard Worker        uaddw           v5.8h,   v5.8h,   v2.8b
2137*c0909341SAndroid Build Coastguard Worker        srshr           v6.8h,   v6.8h,   #4
2138*c0909341SAndroid Build Coastguard Worker        \op             v7.8h,   v7.8h,   \r2
2139*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v5.8h
2140*c0909341SAndroid Build Coastguard Worker        ld1             {v5.8h}, [x7],    \stride
2141*c0909341SAndroid Build Coastguard Worker        uaddw           v6.8h,   v6.8h,   v3.8b
2142*c0909341SAndroid Build Coastguard Worker        srshr           v7.8h,   v7.8h,   #4
2143*c0909341SAndroid Build Coastguard Worker        \op             v5.8h,   v5.8h,   \r3
2144*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [x6],    x1
2145*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b}, [x10],   x1
2146*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v6.8h
2147*c0909341SAndroid Build Coastguard Worker        uaddw           v7.8h,   v7.8h,   v4.8b
2148*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
2149*c0909341SAndroid Build Coastguard Worker        st1             {v3.8b}, [x6],    x1
2150*c0909341SAndroid Build Coastguard Worker        sqxtun          v4.8b,   v7.8h
2151*c0909341SAndroid Build Coastguard Worker        uaddw           v5.8h,   v5.8h,   v2.8b
2152*c0909341SAndroid Build Coastguard Worker        st1             {v4.8b}, [x6],    x1
2153*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v5.8h
2154*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [x6],    x1
2155*c0909341SAndroid Build Coastguard Worker.endm
2156*c0909341SAndroid Build Coastguard Worker        combine         v31.8h, v30.8h, v29.8h, v28.8h, sqadd, x8
2157*c0909341SAndroid Build Coastguard Worker        combine         v27.8h, v26.8h, v25.8h, v24.8h, sqadd, x8
2158*c0909341SAndroid Build Coastguard Worker        combine         v23.8h, v22.8h, v21.8h, v20.8h, sqadd, x8
2159*c0909341SAndroid Build Coastguard Worker        combine         v19.8h, v18.8h, v17.8h, v16.8h, sqadd, x8
2160*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8
2161*c0909341SAndroid Build Coastguard Worker        combine         v16.8h, v17.8h, v18.8h, v19.8h, sqsub, x9
2162*c0909341SAndroid Build Coastguard Worker        combine         v20.8h, v21.8h, v22.8h, v23.8h, sqsub, x9
2163*c0909341SAndroid Build Coastguard Worker        combine         v24.8h, v25.8h, v26.8h, v27.8h, sqsub, x9
2164*c0909341SAndroid Build Coastguard Worker        combine         v28.8h, v29.8h, v30.8h, v31.8h, sqsub, x9
2165*c0909341SAndroid Build Coastguard Worker.purgem combine
2166*c0909341SAndroid Build Coastguard Worker
2167*c0909341SAndroid Build Coastguard Worker        ret             x14
2168*c0909341SAndroid Build Coastguard Workerendfunc
2169*c0909341SAndroid Build Coastguard Worker
2170*c0909341SAndroid Build Coastguard Workerconst eob_32x32
2171*c0909341SAndroid Build Coastguard Worker        .short 36, 136, 300, 1024
2172*c0909341SAndroid Build Coastguard Workerendconst
2173*c0909341SAndroid Build Coastguard Worker
2174*c0909341SAndroid Build Coastguard Workerconst eob_16x32
2175*c0909341SAndroid Build Coastguard Worker        .short 36, 151, 279, 512
2176*c0909341SAndroid Build Coastguard Workerendconst
2177*c0909341SAndroid Build Coastguard Worker
2178*c0909341SAndroid Build Coastguard Workerconst eob_16x32_shortside
2179*c0909341SAndroid Build Coastguard Worker        .short 36, 512
2180*c0909341SAndroid Build Coastguard Workerendconst
2181*c0909341SAndroid Build Coastguard Worker
2182*c0909341SAndroid Build Coastguard Workerconst eob_8x32
2183*c0909341SAndroid Build Coastguard Worker        .short 43, 107, 171, 256
2184*c0909341SAndroid Build Coastguard Workerendconst
2185*c0909341SAndroid Build Coastguard Worker
2186*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_32x32_8bpc_neon, export=1
2187*c0909341SAndroid Build Coastguard Worker        movi            v0.8h,  #0
2188*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
2189*c0909341SAndroid Build Coastguard Worker
2190*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*32
2191*c0909341SAndroid Build Coastguard Worker1:
2192*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2193*c0909341SAndroid Build Coastguard Worker        movrel          x12, eob_32x32
2194*c0909341SAndroid Build Coastguard Worker2:
2195*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2196*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2197*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x2]
2198*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h}, [x2], x8
2199*c0909341SAndroid Build Coastguard Worker.endr
2200*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2201*c0909341SAndroid Build Coastguard Worker
2202*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2203*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x12], #2
2204*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2205*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #8
2206*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2207*c0909341SAndroid Build Coastguard Worker        b.ge            2b
2208*c0909341SAndroid Build Coastguard Worker
2209*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x13], #2
2210*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2211*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2212*c0909341SAndroid Build Coastguard Worker
2213*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  w9, uxtw
2214*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1, lsl #3
2215*c0909341SAndroid Build Coastguard Worker        msub            x2,  x8,  x9,  x2
2216*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #2*8
2217*c0909341SAndroid Build Coastguard Worker        b               1b
2218*c0909341SAndroid Build Coastguard Worker9:
2219*c0909341SAndroid Build Coastguard Worker        ret
2220*c0909341SAndroid Build Coastguard Workerendfunc
2221*c0909341SAndroid Build Coastguard Worker
2222*c0909341SAndroid Build Coastguard Worker.macro shift_8_regs op, shift
2223*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
2224*c0909341SAndroid Build Coastguard Worker        \op             \i,  \i,  #\shift
2225*c0909341SAndroid Build Coastguard Worker.endr
2226*c0909341SAndroid Build Coastguard Worker.endm
2227*c0909341SAndroid Build Coastguard Worker
2228*c0909341SAndroid Build Coastguard Worker.macro def_identity_1632 w, h, wshort, hshort
2229*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
2230*c0909341SAndroid Build Coastguard Worker        mov             w16, #2896*8
2231*c0909341SAndroid Build Coastguard Worker        mov             w17, #2*(5793-4096)*8
2232*c0909341SAndroid Build Coastguard Worker        dup             v1.4h,   w16
2233*c0909341SAndroid Build Coastguard Worker        movi            v0.8h,   #0
2234*c0909341SAndroid Build Coastguard Worker        mov             v1.h[1], w17
2235*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32\hshort
2236*c0909341SAndroid Build Coastguard Worker
2237*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*\h
2238*c0909341SAndroid Build Coastguard Worker1:
2239*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2240*c0909341SAndroid Build Coastguard Worker        movrel          x12, eob_16x32\wshort
2241*c0909341SAndroid Build Coastguard Worker2:
2242*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2243*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
2244*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x2]
2245*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h}, [x2], x8
2246*c0909341SAndroid Build Coastguard Worker.endr
2247*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, v1.h[0], v16, v17, v18, v19, v20, v21, v22, v23
2248*c0909341SAndroid Build Coastguard Worker
2249*c0909341SAndroid Build Coastguard Worker.if \w == 16
2250*c0909341SAndroid Build Coastguard Worker        // 16x32
2251*c0909341SAndroid Build Coastguard Worker        identity_8x8_shift1 v1.h[1]
2252*c0909341SAndroid Build Coastguard Worker.else
2253*c0909341SAndroid Build Coastguard Worker        // 32x16
2254*c0909341SAndroid Build Coastguard Worker        shift_8_regs    sqshl, 1
2255*c0909341SAndroid Build Coastguard Worker        identity_8x8    v1.h[1]
2256*c0909341SAndroid Build Coastguard Worker.endif
2257*c0909341SAndroid Build Coastguard Worker
2258*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2259*c0909341SAndroid Build Coastguard Worker
2260*c0909341SAndroid Build Coastguard Worker.if \w == 16
2261*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2262*c0909341SAndroid Build Coastguard Worker.else
2263*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=4
2264*c0909341SAndroid Build Coastguard Worker.endif
2265*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x12], #2
2266*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2267*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #8
2268*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2269*c0909341SAndroid Build Coastguard Worker        b.ge            2b
2270*c0909341SAndroid Build Coastguard Worker
2271*c0909341SAndroid Build Coastguard Worker        ldrh            w11, [x13], #2
2272*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w11
2273*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2274*c0909341SAndroid Build Coastguard Worker
2275*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  w9, uxtw
2276*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  x1, lsl #3
2277*c0909341SAndroid Build Coastguard Worker        msub            x2,  x8,  x9,  x2
2278*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #2*8
2279*c0909341SAndroid Build Coastguard Worker        b               1b
2280*c0909341SAndroid Build Coastguard Worker9:
2281*c0909341SAndroid Build Coastguard Worker        ret
2282*c0909341SAndroid Build Coastguard Workerendfunc
2283*c0909341SAndroid Build Coastguard Worker.endm
2284*c0909341SAndroid Build Coastguard Worker
2285*c0909341SAndroid Build Coastguard Workerdef_identity_1632 16, 32, _shortside,
2286*c0909341SAndroid Build Coastguard Workerdef_identity_1632 32, 16, , _shortside
2287*c0909341SAndroid Build Coastguard Worker
2288*c0909341SAndroid Build Coastguard Worker.macro def_identity_832 w, h
2289*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_identity_identity_\w\()x\h\()_8bpc_neon, export=1
2290*c0909341SAndroid Build Coastguard Worker        movi            v0.8h,  #0
2291*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x32
2292*c0909341SAndroid Build Coastguard Worker
2293*c0909341SAndroid Build Coastguard Worker        mov             w8,  #2*\h
2294*c0909341SAndroid Build Coastguard Worker1:
2295*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2296*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
2297*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [x2]
2298*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h}, [x2], x8
2299*c0909341SAndroid Build Coastguard Worker.endr
2300*c0909341SAndroid Build Coastguard Worker
2301*c0909341SAndroid Build Coastguard Worker.if \w == 8
2302*c0909341SAndroid Build Coastguard Worker        // 8x32
2303*c0909341SAndroid Build Coastguard Worker        shift_8_regs    srshr, 1
2304*c0909341SAndroid Build Coastguard Worker.endif
2305*c0909341SAndroid Build Coastguard Worker
2306*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2307*c0909341SAndroid Build Coastguard Worker
2308*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2309*c0909341SAndroid Build Coastguard Worker.if \w == 8
2310*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=2
2311*c0909341SAndroid Build Coastguard Worker.else
2312*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x0, x7, shiftbits=3
2313*c0909341SAndroid Build Coastguard Worker.endif
2314*c0909341SAndroid Build Coastguard Worker
2315*c0909341SAndroid Build Coastguard Worker        b.lt            9f
2316*c0909341SAndroid Build Coastguard Worker.if \w == 8
2317*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x8, lsl #3
2318*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #2*8
2319*c0909341SAndroid Build Coastguard Worker.else
2320*c0909341SAndroid Build Coastguard Worker        sub             x0,  x0,  x1, lsl #3
2321*c0909341SAndroid Build Coastguard Worker        add             x0,  x0,  #8
2322*c0909341SAndroid Build Coastguard Worker.endif
2323*c0909341SAndroid Build Coastguard Worker        b               1b
2324*c0909341SAndroid Build Coastguard Worker
2325*c0909341SAndroid Build Coastguard Worker9:
2326*c0909341SAndroid Build Coastguard Worker        ret
2327*c0909341SAndroid Build Coastguard Workerendfunc
2328*c0909341SAndroid Build Coastguard Worker.endm
2329*c0909341SAndroid Build Coastguard Worker
2330*c0909341SAndroid Build Coastguard Workerdef_identity_832 8, 32
2331*c0909341SAndroid Build Coastguard Workerdef_identity_832 32, 8
2332*c0909341SAndroid Build Coastguard Worker
2333*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x32_8bpc_neon, export=1
2334*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  32,  2
2335*c0909341SAndroid Build Coastguard Worker
2336*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2337*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #2048
2338*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
2339*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2340*c0909341SAndroid Build Coastguard Worker
2341*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2342*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*32*2)
2343*c0909341SAndroid Build Coastguard Worker.if \i > 0
2344*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
2345*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2346*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2347*c0909341SAndroid Build Coastguard Worker.if \i < 24
2348*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2349*c0909341SAndroid Build Coastguard Worker.endif
2350*c0909341SAndroid Build Coastguard Worker.endif
2351*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
2352*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
2353*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x8_neon
2354*c0909341SAndroid Build Coastguard Worker.endr
2355*c0909341SAndroid Build Coastguard Worker        b               3f
2356*c0909341SAndroid Build Coastguard Worker
2357*c0909341SAndroid Build Coastguard Worker1:
2358*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2359*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2360*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2361*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2362*c0909341SAndroid Build Coastguard Worker2:
2363*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
2364*c0909341SAndroid Build Coastguard Worker.rept 4
2365*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2366*c0909341SAndroid Build Coastguard Worker.endr
2367*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2368*c0909341SAndroid Build Coastguard Worker
2369*c0909341SAndroid Build Coastguard Worker3:
2370*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2371*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
2372*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2373*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
2374*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2375*c0909341SAndroid Build Coastguard Worker.endr
2376*c0909341SAndroid Build Coastguard Worker
2377*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #2048
2378*c0909341SAndroid Build Coastguard Worker        ret             x15
2379*c0909341SAndroid Build Coastguard Workerendfunc
2380*c0909341SAndroid Build Coastguard Worker
2381*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x32_8bpc_neon, export=1
2382*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  32,  1
2383*c0909341SAndroid Build Coastguard Worker
2384*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2385*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #1024
2386*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
2387*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2388*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_dct_8h_x16_neon
2389*c0909341SAndroid Build Coastguard Worker
2390*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2391*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*16*2)
2392*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
2393*c0909341SAndroid Build Coastguard Worker.if \i > 0
2394*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
2395*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2396*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2397*c0909341SAndroid Build Coastguard Worker.if \i < 24
2398*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2399*c0909341SAndroid Build Coastguard Worker.endif
2400*c0909341SAndroid Build Coastguard Worker.endif
2401*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*32
2402*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_16x8_neon
2403*c0909341SAndroid Build Coastguard Worker.endr
2404*c0909341SAndroid Build Coastguard Worker        b               3f
2405*c0909341SAndroid Build Coastguard Worker
2406*c0909341SAndroid Build Coastguard Worker1:
2407*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2408*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2409*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2410*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2411*c0909341SAndroid Build Coastguard Worker2:
2412*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #8
2413*c0909341SAndroid Build Coastguard Worker.rept 4
2414*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2415*c0909341SAndroid Build Coastguard Worker.endr
2416*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2417*c0909341SAndroid Build Coastguard Worker
2418*c0909341SAndroid Build Coastguard Worker3:
2419*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
2420*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
2421*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2422*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
2423*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2424*c0909341SAndroid Build Coastguard Worker.endr
2425*c0909341SAndroid Build Coastguard Worker
2426*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #1024
2427*c0909341SAndroid Build Coastguard Worker        ret             x15
2428*c0909341SAndroid Build Coastguard Workerendfunc
2429*c0909341SAndroid Build Coastguard Worker
2430*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x16_8bpc_neon, export=1
2431*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  16,  1
2432*c0909341SAndroid Build Coastguard Worker
2433*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2434*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #1024
2435*c0909341SAndroid Build Coastguard Worker
2436*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_dct_8h_x16_neon
2437*c0909341SAndroid Build Coastguard Worker
2438*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
2439*c0909341SAndroid Build Coastguard Worker        add             x6,  sp,  #(\i*32*2)
2440*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
2441*c0909341SAndroid Build Coastguard Worker.if \i > 0
2442*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(16 - \i)
2443*c0909341SAndroid Build Coastguard Worker        cmp             w3,  #36
2444*c0909341SAndroid Build Coastguard Worker        b.lt            1f
2445*c0909341SAndroid Build Coastguard Worker.endif
2446*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*16
2447*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x8_neon
2448*c0909341SAndroid Build Coastguard Worker.endr
2449*c0909341SAndroid Build Coastguard Worker        b               3f
2450*c0909341SAndroid Build Coastguard Worker
2451*c0909341SAndroid Build Coastguard Worker1:
2452*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
2453*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
2454*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
2455*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
2456*c0909341SAndroid Build Coastguard Worker2:
2457*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
2458*c0909341SAndroid Build Coastguard Worker.rept 4
2459*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
2460*c0909341SAndroid Build Coastguard Worker.endr
2461*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2462*c0909341SAndroid Build Coastguard Worker
2463*c0909341SAndroid Build Coastguard Worker3:
2464*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
2465*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
2466*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
2467*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  #(\i*2)
2468*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
2469*c0909341SAndroid Build Coastguard Worker.endr
2470*c0909341SAndroid Build Coastguard Worker
2471*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #1024
2472*c0909341SAndroid Build Coastguard Worker        ret             x15
2473*c0909341SAndroid Build Coastguard Workerendfunc
2474*c0909341SAndroid Build Coastguard Worker
2475*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_8x32_8bpc_neon, export=1
2476*c0909341SAndroid Build Coastguard Worker        idct_dc         8,   32, 2
2477*c0909341SAndroid Build Coastguard Worker
2478*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2479*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
2480*c0909341SAndroid Build Coastguard Worker
2481*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_8x32
2482*c0909341SAndroid Build Coastguard Worker
2483*c0909341SAndroid Build Coastguard Worker        movi            v28.8h,  #0
2484*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*32
2485*c0909341SAndroid Build Coastguard Worker        mov             w9,  #32
2486*c0909341SAndroid Build Coastguard Worker        mov             x6,  sp
2487*c0909341SAndroid Build Coastguard Worker1:
2488*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2489*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x2]
2490*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h}, [x2], x8
2491*c0909341SAndroid Build Coastguard Worker.endr
2492*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
2493*c0909341SAndroid Build Coastguard Worker        sub             x2,  x2,  x8, lsl #3
2494*c0909341SAndroid Build Coastguard Worker        sub             w9,  w9,  #8
2495*c0909341SAndroid Build Coastguard Worker        add             x2,  x2,  #2*8
2496*c0909341SAndroid Build Coastguard Worker
2497*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x8_neon
2498*c0909341SAndroid Build Coastguard Worker
2499*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2500*c0909341SAndroid Build Coastguard Worker        srshr           v\i\().8h,  v\i\().8h,  #2
2501*c0909341SAndroid Build Coastguard Worker.endr
2502*c0909341SAndroid Build Coastguard Worker
2503*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v24, v25
2504*c0909341SAndroid Build Coastguard Worker
2505*c0909341SAndroid Build Coastguard Worker        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x6], #64
2506*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
2507*c0909341SAndroid Build Coastguard Worker        st1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x6], #64
2508*c0909341SAndroid Build Coastguard Worker
2509*c0909341SAndroid Build Coastguard Worker        b.ge            1b
2510*c0909341SAndroid Build Coastguard Worker        cbz             w9,  3f
2511*c0909341SAndroid Build Coastguard Worker
2512*c0909341SAndroid Build Coastguard Worker        movi            v29.8h,  #0
2513*c0909341SAndroid Build Coastguard Worker        movi            v30.8h,  #0
2514*c0909341SAndroid Build Coastguard Worker        movi            v31.8h,  #0
2515*c0909341SAndroid Build Coastguard Worker2:
2516*c0909341SAndroid Build Coastguard Worker        subs            w9,  w9,  #8
2517*c0909341SAndroid Build Coastguard Worker.rept 2
2518*c0909341SAndroid Build Coastguard Worker        st1             {v28.8h,v29.8h,v30.8h,v31.8h}, [x6], #64
2519*c0909341SAndroid Build Coastguard Worker.endr
2520*c0909341SAndroid Build Coastguard Worker        b.gt            2b
2521*c0909341SAndroid Build Coastguard Worker
2522*c0909341SAndroid Build Coastguard Worker3:
2523*c0909341SAndroid Build Coastguard Worker        mov             x6,  x0
2524*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
2525*c0909341SAndroid Build Coastguard Worker        mov             x8,  #8*2
2526*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
2527*c0909341SAndroid Build Coastguard Worker
2528*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
2529*c0909341SAndroid Build Coastguard Worker        ret             x15
2530*c0909341SAndroid Build Coastguard Workerendfunc
2531*c0909341SAndroid Build Coastguard Worker
2532*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x8_8bpc_neon, export=1
2533*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  8,   2
2534*c0909341SAndroid Build Coastguard Worker
2535*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
2536*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #512
2537*c0909341SAndroid Build Coastguard Worker
2538*c0909341SAndroid Build Coastguard Worker        mov             x6,  sp
2539*c0909341SAndroid Build Coastguard Worker        mov             x7,  x2
2540*c0909341SAndroid Build Coastguard Worker        mov             x8,  #8*2
2541*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_32x8_neon
2542*c0909341SAndroid Build Coastguard Worker
2543*c0909341SAndroid Build Coastguard Worker        mov             x8,  #2*32
2544*c0909341SAndroid Build Coastguard Worker        mov             w9,  #0
2545*c0909341SAndroid Build Coastguard Worker1:
2546*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  x9
2547*c0909341SAndroid Build Coastguard Worker        add             x7,  sp,  x9, lsl #1 // #(\i*2)
2548*c0909341SAndroid Build Coastguard Worker
2549*c0909341SAndroid Build Coastguard Worker.irp i, 16, 17, 18, 19, 20, 21, 22, 23
2550*c0909341SAndroid Build Coastguard Worker        ld1             {v\i\().8h}, [x7], x8
2551*c0909341SAndroid Build Coastguard Worker.endr
2552*c0909341SAndroid Build Coastguard Worker        add             w9,  w9,  #8
2553*c0909341SAndroid Build Coastguard Worker
2554*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x8_neon
2555*c0909341SAndroid Build Coastguard Worker
2556*c0909341SAndroid Build Coastguard Worker        cmp             w9,  #32
2557*c0909341SAndroid Build Coastguard Worker
2558*c0909341SAndroid Build Coastguard Worker        load_add_store_8x8 x6, x7
2559*c0909341SAndroid Build Coastguard Worker
2560*c0909341SAndroid Build Coastguard Worker        b.lt            1b
2561*c0909341SAndroid Build Coastguard Worker
2562*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #512
2563*c0909341SAndroid Build Coastguard Worker        ret             x15
2564*c0909341SAndroid Build Coastguard Workerendfunc
2565*c0909341SAndroid Build Coastguard Worker
2566*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step1_neon
2567*c0909341SAndroid Build Coastguard Worker        // in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a
2568*c0909341SAndroid Build Coastguard Worker        // in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a
2569*c0909341SAndroid Build Coastguard Worker        // in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a
2570*c0909341SAndroid Build Coastguard Worker        // in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a
2571*c0909341SAndroid Build Coastguard Worker
2572*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8h, v1.8h}, [x17], #32
2573*c0909341SAndroid Build Coastguard Worker
2574*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v23.8h,  v16.8h,  v0.h[1]   // t63a
2575*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v16.8h,  v16.8h,  v0.h[0]   // t32a
2576*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v22.8h,  v17.8h,  v0.h[2]   // t62a
2577*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v17.8h,  v17.8h,  v0.h[3]   // t33a
2578*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v21.8h,  v18.8h,  v0.h[5]   // t61a
2579*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v18.8h,  v18.8h,  v0.h[4]   // t34a
2580*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v20.8h,  v19.8h,  v0.h[6]   // t60a
2581*c0909341SAndroid Build Coastguard Worker        sqrdmulh        v19.8h,  v19.8h,  v0.h[7]   // t35a
2582*c0909341SAndroid Build Coastguard Worker
2583*c0909341SAndroid Build Coastguard Worker        sqadd           v24.8h,  v16.8h,  v17.8h    // t32
2584*c0909341SAndroid Build Coastguard Worker        sqsub           v25.8h,  v16.8h,  v17.8h    // t33
2585*c0909341SAndroid Build Coastguard Worker        sqsub           v26.8h,  v19.8h,  v18.8h    // t34
2586*c0909341SAndroid Build Coastguard Worker        sqadd           v27.8h,  v19.8h,  v18.8h    // t35
2587*c0909341SAndroid Build Coastguard Worker        sqadd           v28.8h,  v20.8h,  v21.8h    // t60
2588*c0909341SAndroid Build Coastguard Worker        sqsub           v29.8h,  v20.8h,  v21.8h    // t61
2589*c0909341SAndroid Build Coastguard Worker        sqsub           v30.8h,  v23.8h,  v22.8h    // t62
2590*c0909341SAndroid Build Coastguard Worker        sqadd           v31.8h,  v23.8h,  v22.8h    // t63
2591*c0909341SAndroid Build Coastguard Worker
2592*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v29, v26, v1.h[0], v1.h[1], .8h // -> t34a
2593*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v29, v26, v1.h[1], v1.h[0], .8h // -> t61a
2594*c0909341SAndroid Build Coastguard Worker        neg             v2.4s,   v2.4s              // t34a
2595*c0909341SAndroid Build Coastguard Worker        neg             v3.4s,   v3.4s              // t34a
2596*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v30, v25, v1.h[1], v1.h[0], .8h // -> t33a
2597*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v26, v2,  v3,  #12, .8h     // t34a
2598*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v30, v25, v1.h[0], v1.h[1], .8h // -> t62a
2599*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v29, v4,  v5,  #12, .8h     // t61a
2600*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v6,  v7,  #12, .8h     // t33a
2601*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v30, v2,  v3,  #12, .8h     // t62a
2602*c0909341SAndroid Build Coastguard Worker
2603*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v24.8h,  v27.8h    // t32a
2604*c0909341SAndroid Build Coastguard Worker        sqsub           v19.8h,  v24.8h,  v27.8h    // t35a
2605*c0909341SAndroid Build Coastguard Worker        sqadd           v17.8h,  v25.8h,  v26.8h    // t33
2606*c0909341SAndroid Build Coastguard Worker        sqsub           v18.8h,  v25.8h,  v26.8h    // t34
2607*c0909341SAndroid Build Coastguard Worker        sqsub           v20.8h,  v31.8h,  v28.8h    // t60a
2608*c0909341SAndroid Build Coastguard Worker        sqadd           v23.8h,  v31.8h,  v28.8h    // t63a
2609*c0909341SAndroid Build Coastguard Worker        sqsub           v21.8h,  v30.8h,  v29.8h    // t61
2610*c0909341SAndroid Build Coastguard Worker        sqadd           v22.8h,  v30.8h,  v29.8h    // t62
2611*c0909341SAndroid Build Coastguard Worker
2612*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v21, v18, v1.h[2], v1.h[3], .8h // -> t61a
2613*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v21, v18, v1.h[3], v1.h[2], .8h // -> t34a
2614*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v20, v19, v1.h[2], v1.h[3], .8h // -> t60
2615*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v2,  v3,  #12, .8h     // t61a
2616*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v4,  v5,  #12, .8h     // t34a
2617*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v20, v19, v1.h[3], v1.h[2], .8h // -> t35
2618*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v6,  v7,  #12, .8h     // t60
2619*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v2,  v3,  #12, .8h     // t35
2620*c0909341SAndroid Build Coastguard Worker
2621*c0909341SAndroid Build Coastguard Worker        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x6], #64
2622*c0909341SAndroid Build Coastguard Worker        st1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x6], #64
2623*c0909341SAndroid Build Coastguard Worker
2624*c0909341SAndroid Build Coastguard Worker        ret
2625*c0909341SAndroid Build Coastguard Workerendfunc
2626*c0909341SAndroid Build Coastguard Worker
2627*c0909341SAndroid Build Coastguard Workerfunction inv_dct64_step2_neon
2628*c0909341SAndroid Build Coastguard Worker        movrel          x16, idct_coeffs
2629*c0909341SAndroid Build Coastguard Worker        ld1             {v0.4h}, [x16]
2630*c0909341SAndroid Build Coastguard Worker1:
2631*c0909341SAndroid Build Coastguard Worker        // t32a/33/34a/35/60/61a/62/63a
2632*c0909341SAndroid Build Coastguard Worker        // t56a/57/58a/59/36/37a/38/39a
2633*c0909341SAndroid Build Coastguard Worker        // t40a/41/42a/43/52/53a/54/55a
2634*c0909341SAndroid Build Coastguard Worker        // t48a/49/50a/51/44/45a/46/47a
2635*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x6, #2*8*0]  // t32a
2636*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x9, #2*8*8]  // t39a
2637*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9, #2*8*0]  // t63a
2638*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x6, #2*8*8]  // t56a
2639*c0909341SAndroid Build Coastguard Worker        ldr             q20, [x6, #2*8*16] // t40a
2640*c0909341SAndroid Build Coastguard Worker        ldr             q21, [x9, #2*8*24] // t47a
2641*c0909341SAndroid Build Coastguard Worker        ldr             q22, [x9, #2*8*16] // t55a
2642*c0909341SAndroid Build Coastguard Worker        ldr             q23, [x6, #2*8*24] // t48a
2643*c0909341SAndroid Build Coastguard Worker
2644*c0909341SAndroid Build Coastguard Worker        sqadd           v24.8h,  v16.8h, v17.8h // t32
2645*c0909341SAndroid Build Coastguard Worker        sqsub           v25.8h,  v16.8h, v17.8h // t39
2646*c0909341SAndroid Build Coastguard Worker        sqadd           v26.8h,  v18.8h, v19.8h // t63
2647*c0909341SAndroid Build Coastguard Worker        sqsub           v27.8h,  v18.8h, v19.8h // t56
2648*c0909341SAndroid Build Coastguard Worker        sqsub           v28.8h,  v21.8h, v20.8h // t40
2649*c0909341SAndroid Build Coastguard Worker        sqadd           v29.8h,  v21.8h, v20.8h // t47
2650*c0909341SAndroid Build Coastguard Worker        sqadd           v30.8h,  v23.8h, v22.8h // t48
2651*c0909341SAndroid Build Coastguard Worker        sqsub           v31.8h,  v23.8h, v22.8h // t55
2652*c0909341SAndroid Build Coastguard Worker
2653*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v27, v25, v0.h[3], v0.h[2], .8h // -> t56a
2654*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v4,  v5,  v27, v25, v0.h[2], v0.h[3], .8h // -> t39a
2655*c0909341SAndroid Build Coastguard Worker        smull_smlal     v6,  v7,  v31, v28, v0.h[3], v0.h[2], .8h // -> t40a
2656*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v25, v2,  v3,  #12, .8h     // t56a
2657*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v27, v4,  v5,  #12, .8h     // t39a
2658*c0909341SAndroid Build Coastguard Worker        neg             v6.4s,   v6.4s              // t40a
2659*c0909341SAndroid Build Coastguard Worker        neg             v7.4s,   v7.4s              // t40a
2660*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v31, v28, v0.h[2], v0.h[3], .8h // -> t55a
2661*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v31, v6,  v7,  #12, .8h     // t40a
2662*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v28, v2,  v3,  #12, .8h     // t55a
2663*c0909341SAndroid Build Coastguard Worker
2664*c0909341SAndroid Build Coastguard Worker        sqadd           v16.8h,  v24.8h,  v29.8h    // t32a
2665*c0909341SAndroid Build Coastguard Worker        sqsub           v19.8h,  v24.8h,  v29.8h    // t47a
2666*c0909341SAndroid Build Coastguard Worker        sqadd           v17.8h,  v27.8h,  v31.8h    // t39
2667*c0909341SAndroid Build Coastguard Worker        sqsub           v18.8h,  v27.8h,  v31.8h    // t40
2668*c0909341SAndroid Build Coastguard Worker        sqsub           v20.8h,  v26.8h,  v30.8h    // t48a
2669*c0909341SAndroid Build Coastguard Worker        sqadd           v23.8h,  v26.8h,  v30.8h    // t63a
2670*c0909341SAndroid Build Coastguard Worker        sqsub           v21.8h,  v25.8h,  v28.8h    // t55
2671*c0909341SAndroid Build Coastguard Worker        sqadd           v22.8h,  v25.8h,  v28.8h    // t56
2672*c0909341SAndroid Build Coastguard Worker
2673*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v2,  v3,  v21, v18, v0.h[0], v0.h[0], .8h // -> t40a
2674*c0909341SAndroid Build Coastguard Worker        smull_smlal     v4,  v5,  v21, v18, v0.h[0], v0.h[0], .8h // -> t55a
2675*c0909341SAndroid Build Coastguard Worker        smull_smlsl     v6,  v7,  v20, v19, v0.h[0], v0.h[0], .8h // -> t47
2676*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v18, v2,  v3,  #12, .8h     // t40a
2677*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v21, v4,  v5,  #12, .8h     // t55a
2678*c0909341SAndroid Build Coastguard Worker        smull_smlal     v2,  v3,  v20, v19, v0.h[0], v0.h[0], .8h // -> t48
2679*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v19, v6,  v7,  #12, .8h     // t47
2680*c0909341SAndroid Build Coastguard Worker        sqrshrn_sz      v20, v2,  v3,  #12, .8h     // t48
2681*c0909341SAndroid Build Coastguard Worker
2682*c0909341SAndroid Build Coastguard Worker        str             q16, [x6, #2*8*0]  // t32a
2683*c0909341SAndroid Build Coastguard Worker        str             q17, [x9, #2*8*0]  // t39
2684*c0909341SAndroid Build Coastguard Worker        str             q18, [x6, #2*8*8]  // t40a
2685*c0909341SAndroid Build Coastguard Worker        str             q19, [x9, #2*8*8]  // t47
2686*c0909341SAndroid Build Coastguard Worker        str             q20, [x6, #2*8*16] // t48
2687*c0909341SAndroid Build Coastguard Worker        str             q21, [x9, #2*8*16] // t55a
2688*c0909341SAndroid Build Coastguard Worker        str             q22, [x6, #2*8*24] // t56
2689*c0909341SAndroid Build Coastguard Worker        str             q23, [x9, #2*8*24] // t63a
2690*c0909341SAndroid Build Coastguard Worker
2691*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #2*8
2692*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  #2*8
2693*c0909341SAndroid Build Coastguard Worker        cmp             x6,  x9
2694*c0909341SAndroid Build Coastguard Worker        b.lt            1b
2695*c0909341SAndroid Build Coastguard Worker        ret
2696*c0909341SAndroid Build Coastguard Workerendfunc
2697*c0909341SAndroid Build Coastguard Worker
2698*c0909341SAndroid Build Coastguard Worker.macro load8 src, strd, zero, clear
2699*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h
2700*c0909341SAndroid Build Coastguard Worker.if \clear
2701*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [\src]
2702*c0909341SAndroid Build Coastguard Worker        st1             {\zero}, [\src], \strd
2703*c0909341SAndroid Build Coastguard Worker.else
2704*c0909341SAndroid Build Coastguard Worker        ld1             {\i}, [\src], \strd
2705*c0909341SAndroid Build Coastguard Worker.endif
2706*c0909341SAndroid Build Coastguard Worker.endr
2707*c0909341SAndroid Build Coastguard Worker.endm
2708*c0909341SAndroid Build Coastguard Worker
2709*c0909341SAndroid Build Coastguard Worker.macro store16 dst
2710*c0909341SAndroid Build Coastguard Worker.irp i, v16.8h, v17.8h, v18.8h, v19.8h, v20.8h, v21.8h, v22.8h, v23.8h, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
2711*c0909341SAndroid Build Coastguard Worker        st1             {\i}, [\dst], #16
2712*c0909341SAndroid Build Coastguard Worker.endr
2713*c0909341SAndroid Build Coastguard Worker.endm
2714*c0909341SAndroid Build Coastguard Worker
2715*c0909341SAndroid Build Coastguard Worker.macro clear_upper8
2716*c0909341SAndroid Build Coastguard Worker.irp i, v24.8h, v25.8h, v26.8h, v27.8h, v28.8h, v29.8h, v30.8h, v31.8h
2717*c0909341SAndroid Build Coastguard Worker        movi            \i,  #0
2718*c0909341SAndroid Build Coastguard Worker.endr
2719*c0909341SAndroid Build Coastguard Worker.endm
2720*c0909341SAndroid Build Coastguard Worker
2721*c0909341SAndroid Build Coastguard Worker.macro movi_if reg, val, cond
2722*c0909341SAndroid Build Coastguard Worker.if \cond
2723*c0909341SAndroid Build Coastguard Worker        movi            \reg, \val
2724*c0909341SAndroid Build Coastguard Worker.endif
2725*c0909341SAndroid Build Coastguard Worker.endm
2726*c0909341SAndroid Build Coastguard Worker
2727*c0909341SAndroid Build Coastguard Worker.macro movdup_if reg, gpr, val, cond
2728*c0909341SAndroid Build Coastguard Worker.if \cond
2729*c0909341SAndroid Build Coastguard Worker        mov             \gpr, \val
2730*c0909341SAndroid Build Coastguard Worker        dup             \reg, \gpr
2731*c0909341SAndroid Build Coastguard Worker.endif
2732*c0909341SAndroid Build Coastguard Worker.endm
2733*c0909341SAndroid Build Coastguard Worker
2734*c0909341SAndroid Build Coastguard Worker.macro st1_if regs, dst, cond
2735*c0909341SAndroid Build Coastguard Worker.if \cond
2736*c0909341SAndroid Build Coastguard Worker        st1             \regs, \dst
2737*c0909341SAndroid Build Coastguard Worker.endif
2738*c0909341SAndroid Build Coastguard Worker.endm
2739*c0909341SAndroid Build Coastguard Worker
2740*c0909341SAndroid Build Coastguard Worker.macro str_if reg, dst, cond
2741*c0909341SAndroid Build Coastguard Worker.if \cond
2742*c0909341SAndroid Build Coastguard Worker        str             \reg, \dst
2743*c0909341SAndroid Build Coastguard Worker.endif
2744*c0909341SAndroid Build Coastguard Worker.endm
2745*c0909341SAndroid Build Coastguard Worker
2746*c0909341SAndroid Build Coastguard Worker.macro stroff_if reg, dst, dstoff, cond
2747*c0909341SAndroid Build Coastguard Worker.if \cond
2748*c0909341SAndroid Build Coastguard Worker        str             \reg, \dst, \dstoff
2749*c0909341SAndroid Build Coastguard Worker.endif
2750*c0909341SAndroid Build Coastguard Worker.endm
2751*c0909341SAndroid Build Coastguard Worker
2752*c0909341SAndroid Build Coastguard Worker.macro scale_if cond, c, r0, r1, r2, r3, r4, r5, r6, r7
2753*c0909341SAndroid Build Coastguard Worker.if \cond
2754*c0909341SAndroid Build Coastguard Worker        scale_input     .8h, \c, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7
2755*c0909341SAndroid Build Coastguard Worker.endif
2756*c0909341SAndroid Build Coastguard Worker.endm
2757*c0909341SAndroid Build Coastguard Worker
2758*c0909341SAndroid Build Coastguard Worker.macro def_dct64_func suffix, clear=0, scale=0
2759*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_dct\suffix\()_8h_x64_neon, export=1
2760*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2761*c0909341SAndroid Build Coastguard Worker        mov             x6,  sp
2762*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #2
2763*c0909341SAndroid Build Coastguard Worker
2764*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2765*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2766*c0909341SAndroid Build Coastguard Worker        load8           x7,  x8,  v7.8h, \clear
2767*c0909341SAndroid Build Coastguard Worker        clear_upper8
2768*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #3
2769*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsr #1
2770*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
2771*c0909341SAndroid Build Coastguard Worker
2772*c0909341SAndroid Build Coastguard Worker        bl              inv_dct_8h_x16_neon
2773*c0909341SAndroid Build Coastguard Worker
2774*c0909341SAndroid Build Coastguard Worker        store16         x6
2775*c0909341SAndroid Build Coastguard Worker
2776*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2777*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2778*c0909341SAndroid Build Coastguard Worker        load8           x7,  x8,  v7.8h, \clear
2779*c0909341SAndroid Build Coastguard Worker        clear_upper8
2780*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsl #3
2781*c0909341SAndroid Build Coastguard Worker        lsr             x8,  x8,  #1
2782*c0909341SAndroid Build Coastguard Worker        sub             x7,  x7,  x8, lsr #1
2783*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19, v20, v21, v22, v23
2784*c0909341SAndroid Build Coastguard Worker
2785*c0909341SAndroid Build Coastguard Worker        bl              inv_dct32_odd_8h_x16_neon
2786*c0909341SAndroid Build Coastguard Worker
2787*c0909341SAndroid Build Coastguard Worker        add             x10, x6,  #16*15
2788*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #16*16
2789*c0909341SAndroid Build Coastguard Worker
2790*c0909341SAndroid Build Coastguard Worker        mov             x9,  #-16
2791*c0909341SAndroid Build Coastguard Worker
2792*c0909341SAndroid Build Coastguard Worker.macro store_addsub r0, r1, r2, r3
2793*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8h}, [x6], #16
2794*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8h}, [x6], #16
2795*c0909341SAndroid Build Coastguard Worker        sqadd           v6.8h,  v2.8h,  \r0
2796*c0909341SAndroid Build Coastguard Worker        sqsub           \r0,    v2.8h,  \r0
2797*c0909341SAndroid Build Coastguard Worker        ld1             {v4.8h}, [x6], #16
2798*c0909341SAndroid Build Coastguard Worker        sqadd           v7.8h,  v3.8h,  \r1
2799*c0909341SAndroid Build Coastguard Worker        sqsub           \r1,    v3.8h,  \r1
2800*c0909341SAndroid Build Coastguard Worker        ld1             {v5.8h}, [x6], #16
2801*c0909341SAndroid Build Coastguard Worker        sqadd           v2.8h,  v4.8h,  \r2
2802*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #16*4
2803*c0909341SAndroid Build Coastguard Worker        sqsub           \r2,    v4.8h,  \r2
2804*c0909341SAndroid Build Coastguard Worker        st1             {v6.8h}, [x6], #16
2805*c0909341SAndroid Build Coastguard Worker        st1             {\r0},   [x10], x9
2806*c0909341SAndroid Build Coastguard Worker        sqadd           v3.8h,  v5.8h,  \r3
2807*c0909341SAndroid Build Coastguard Worker        sqsub           \r3,    v5.8h,  \r3
2808*c0909341SAndroid Build Coastguard Worker        st1             {v7.8h}, [x6], #16
2809*c0909341SAndroid Build Coastguard Worker        st1             {\r1},   [x10], x9
2810*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h}, [x6], #16
2811*c0909341SAndroid Build Coastguard Worker        st1             {\r2},   [x10], x9
2812*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h}, [x6], #16
2813*c0909341SAndroid Build Coastguard Worker        st1             {\r3},   [x10], x9
2814*c0909341SAndroid Build Coastguard Worker.endm
2815*c0909341SAndroid Build Coastguard Worker        store_addsub    v31.8h, v30.8h, v29.8h, v28.8h
2816*c0909341SAndroid Build Coastguard Worker        store_addsub    v27.8h, v26.8h, v25.8h, v24.8h
2817*c0909341SAndroid Build Coastguard Worker        store_addsub    v23.8h, v22.8h, v21.8h, v20.8h
2818*c0909341SAndroid Build Coastguard Worker        store_addsub    v19.8h, v18.8h, v17.8h, v16.8h
2819*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
2820*c0909341SAndroid Build Coastguard Worker
2821*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #2*8*16
2822*c0909341SAndroid Build Coastguard Worker
2823*c0909341SAndroid Build Coastguard Worker        movrel          x17, idct64_coeffs
2824*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2825*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2826*c0909341SAndroid Build Coastguard Worker        add             x9,  x7,  x8, lsl #4 // offset 16
2827*c0909341SAndroid Build Coastguard Worker        add             x10, x7,  x8, lsl #3 // offset 8
2828*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8         // offset 15
2829*c0909341SAndroid Build Coastguard Worker        sub             x11, x10, x8         // offset 7
2830*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h}, [x7]  // in1  (offset 0)
2831*c0909341SAndroid Build Coastguard Worker        ld1             {v17.8h}, [x9]  // in31 (offset 15)
2832*c0909341SAndroid Build Coastguard Worker        ld1             {v18.8h}, [x10] // in17 (offset 8)
2833*c0909341SAndroid Build Coastguard Worker        ld1             {v19.8h}, [x11] // in15 (offset 7)
2834*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x7],  \clear
2835*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x9],  \clear
2836*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x10], \clear
2837*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x11], \clear
2838*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19
2839*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2840*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2841*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2842*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8, lsl #2 // offset 4
2843*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8, lsl #2 // offset 11
2844*c0909341SAndroid Build Coastguard Worker        sub             x10, x7,  x8         // offset 3
2845*c0909341SAndroid Build Coastguard Worker        add             x11, x9,  x8         // offset 12
2846*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h}, [x10] // in7  (offset 3)
2847*c0909341SAndroid Build Coastguard Worker        ld1             {v17.8h}, [x11] // in25 (offset 12)
2848*c0909341SAndroid Build Coastguard Worker        ld1             {v18.8h}, [x9]  // in23 (offset 11)
2849*c0909341SAndroid Build Coastguard Worker        ld1             {v19.8h}, [x7]  // in9  (offset 4)
2850*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x7],  \clear
2851*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x9],  \clear
2852*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x10], \clear
2853*c0909341SAndroid Build Coastguard Worker        st1_if          {v7.8h}, [x11], \clear
2854*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19
2855*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2856*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2857*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2858*c0909341SAndroid Build Coastguard Worker        sub             x10, x10, x8, lsl #1 // offset 1
2859*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x8, lsl #1 // offset 9
2860*c0909341SAndroid Build Coastguard Worker        add             x7,  x7,  x8         // offset 5
2861*c0909341SAndroid Build Coastguard Worker        add             x11, x11, x8         // offset 13
2862*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x10, x8] // in5  (offset 2)
2863*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x11]     // in27 (offset 13)
2864*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9,  x8] // in21 (offset 10)
2865*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x7]      // in11 (offset 5)
2866*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x10, x8], \clear
2867*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x11],     \clear
2868*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x9,  x8], \clear
2869*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x7],      \clear
2870*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19
2871*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2872*c0909341SAndroid Build Coastguard Worker        movdup_if       v0.4h, w16, #2896*8, \scale
2873*c0909341SAndroid Build Coastguard Worker        movi_if         v7.8h,  #0, \clear
2874*c0909341SAndroid Build Coastguard Worker        ldr             q16, [x10]     // in3  (offset 1)
2875*c0909341SAndroid Build Coastguard Worker        ldr             q17, [x11, x8] // in29 (offset 14)
2876*c0909341SAndroid Build Coastguard Worker        ldr             q18, [x9]      // in19 (offset 9)
2877*c0909341SAndroid Build Coastguard Worker        ldr             q19, [x7,  x8] // in13 (offset 6)
2878*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x10],     \clear
2879*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x11, x8], \clear
2880*c0909341SAndroid Build Coastguard Worker        str_if          q7,  [x9],      \clear
2881*c0909341SAndroid Build Coastguard Worker        stroff_if       q7,  [x7,  x8], \clear
2882*c0909341SAndroid Build Coastguard Worker        scale_if        \scale, v0.h[0], v16, v17, v18, v19
2883*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step1_neon
2884*c0909341SAndroid Build Coastguard Worker
2885*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  #2*8*32
2886*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  #2*8*7
2887*c0909341SAndroid Build Coastguard Worker
2888*c0909341SAndroid Build Coastguard Worker        bl              inv_dct64_step2_neon
2889*c0909341SAndroid Build Coastguard Worker
2890*c0909341SAndroid Build Coastguard Worker        ret             x14
2891*c0909341SAndroid Build Coastguard Workerendfunc
2892*c0909341SAndroid Build Coastguard Worker.endm
2893*c0909341SAndroid Build Coastguard Worker
2894*c0909341SAndroid Build Coastguard Workerdef_dct64_func
2895*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear, clear=1
2896*c0909341SAndroid Build Coastguard Workerdef_dct64_func _clear_scale, clear=1, scale=1
2897*c0909341SAndroid Build Coastguard Worker
2898*c0909341SAndroid Build Coastguard Worker
2899*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_horz_dct_64x8_neon
2900*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2901*c0909341SAndroid Build Coastguard Worker
2902*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
2903*c0909341SAndroid Build Coastguard Worker        add             x8,  sp,  #2*8*(64 - 4)
2904*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  #2*56
2905*c0909341SAndroid Build Coastguard Worker        mov             x10, #2*64
2906*c0909341SAndroid Build Coastguard Worker        mov             x11, #-2*8*4
2907*c0909341SAndroid Build Coastguard Worker
2908*c0909341SAndroid Build Coastguard Worker        dup             v7.8h,  w12
2909*c0909341SAndroid Build Coastguard Worker1:
2910*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x7], #64
2911*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8h, v29.8h, v30.8h, v31.8h}, [x8], x11
2912*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x7], #64
2913*c0909341SAndroid Build Coastguard Worker        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x8], x11
2914*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v16, v17, v18, v19, v20, v21, v22, v23, v4, v5
2915*c0909341SAndroid Build Coastguard Worker        transpose_8x8h  v31, v30, v29, v28, v27, v26, v25, v24, v4, v5
2916*c0909341SAndroid Build Coastguard Worker
2917*c0909341SAndroid Build Coastguard Worker.macro store_addsub src0, src1, src2, src3
2918*c0909341SAndroid Build Coastguard Worker        sqsub           v1.8h,   \src0,   \src1
2919*c0909341SAndroid Build Coastguard Worker        sqadd           v0.8h,   \src0,   \src1
2920*c0909341SAndroid Build Coastguard Worker        sqsub           v3.8h,   \src2,   \src3
2921*c0909341SAndroid Build Coastguard Worker        srshl           v1.8h,   v1.8h,   v7.8h
2922*c0909341SAndroid Build Coastguard Worker        sqadd           v2.8h,   \src2,   \src3
2923*c0909341SAndroid Build Coastguard Worker        srshl           v0.8h,   v0.8h,   v7.8h
2924*c0909341SAndroid Build Coastguard Worker        srshl           v3.8h,   v3.8h,   v7.8h
2925*c0909341SAndroid Build Coastguard Worker        rev64           v1.8h,   v1.8h
2926*c0909341SAndroid Build Coastguard Worker        srshl           v2.8h,   v2.8h,   v7.8h
2927*c0909341SAndroid Build Coastguard Worker        rev64           v3.8h,   v3.8h
2928*c0909341SAndroid Build Coastguard Worker        ext             v1.16b,  v1.16b,  v1.16b,  #8
2929*c0909341SAndroid Build Coastguard Worker        st1             {v0.8h},  [x6], x10
2930*c0909341SAndroid Build Coastguard Worker        ext             v3.16b,  v3.16b,  v3.16b,  #8
2931*c0909341SAndroid Build Coastguard Worker        st1             {v1.8h},  [x9], x10
2932*c0909341SAndroid Build Coastguard Worker        st1             {v2.8h},  [x6], x10
2933*c0909341SAndroid Build Coastguard Worker        st1             {v3.8h},  [x9], x10
2934*c0909341SAndroid Build Coastguard Worker.endm
2935*c0909341SAndroid Build Coastguard Worker        store_addsub    v16.8h,  v31.8h,  v17.8h,  v30.8h
2936*c0909341SAndroid Build Coastguard Worker        store_addsub    v18.8h,  v29.8h,  v19.8h,  v28.8h
2937*c0909341SAndroid Build Coastguard Worker        store_addsub    v20.8h,  v27.8h,  v21.8h,  v26.8h
2938*c0909341SAndroid Build Coastguard Worker        store_addsub    v22.8h,  v25.8h,  v23.8h,  v24.8h
2939*c0909341SAndroid Build Coastguard Worker.purgem store_addsub
2940*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  x10, lsl #3
2941*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x10, lsl #3
2942*c0909341SAndroid Build Coastguard Worker        add             x6,  x6,  #16
2943*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  #16
2944*c0909341SAndroid Build Coastguard Worker
2945*c0909341SAndroid Build Coastguard Worker        cmp             x7,  x8
2946*c0909341SAndroid Build Coastguard Worker        b.lt            1b
2947*c0909341SAndroid Build Coastguard Worker        ret             x14
2948*c0909341SAndroid Build Coastguard Workerendfunc
2949*c0909341SAndroid Build Coastguard Worker
2950*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_vert_dct_8x64_neon
2951*c0909341SAndroid Build Coastguard Worker        mov             x14, x30
2952*c0909341SAndroid Build Coastguard Worker        lsl             x8,  x8,  #1
2953*c0909341SAndroid Build Coastguard Worker
2954*c0909341SAndroid Build Coastguard Worker        mov             x7,  sp
2955*c0909341SAndroid Build Coastguard Worker        add             x8,  sp,  #2*8*(64 - 4)
2956*c0909341SAndroid Build Coastguard Worker        add             x9,  x6,  x1, lsl #6
2957*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x1
2958*c0909341SAndroid Build Coastguard Worker        neg             x10, x1
2959*c0909341SAndroid Build Coastguard Worker        mov             x11, #-2*8*4
2960*c0909341SAndroid Build Coastguard Worker
2961*c0909341SAndroid Build Coastguard Worker1:
2962*c0909341SAndroid Build Coastguard Worker        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [x7], #64
2963*c0909341SAndroid Build Coastguard Worker        ld1             {v28.8h, v29.8h, v30.8h, v31.8h}, [x8], x11
2964*c0909341SAndroid Build Coastguard Worker        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [x7], #64
2965*c0909341SAndroid Build Coastguard Worker        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x8], x11
2966*c0909341SAndroid Build Coastguard Worker
2967*c0909341SAndroid Build Coastguard Worker.macro add_dest_addsub src0, src1, src2, src3
2968*c0909341SAndroid Build Coastguard Worker        ld1             {v0.8b}, [x6], x1
2969*c0909341SAndroid Build Coastguard Worker        ld1             {v1.8b}, [x9], x10
2970*c0909341SAndroid Build Coastguard Worker        sqadd           v4.8h,   \src0,   \src1
2971*c0909341SAndroid Build Coastguard Worker        ld1             {v2.8b}, [x6]
2972*c0909341SAndroid Build Coastguard Worker        sqsub           v5.8h,   \src0,   \src1
2973*c0909341SAndroid Build Coastguard Worker        ld1             {v3.8b}, [x9]
2974*c0909341SAndroid Build Coastguard Worker        sqadd           v6.8h,   \src2,   \src3
2975*c0909341SAndroid Build Coastguard Worker        sqsub           v7.8h,   \src2,   \src3
2976*c0909341SAndroid Build Coastguard Worker        sub             x6,  x6,  x1
2977*c0909341SAndroid Build Coastguard Worker        sub             x9,  x9,  x10
2978*c0909341SAndroid Build Coastguard Worker        srshr           v4.8h,   v4.8h,   #4
2979*c0909341SAndroid Build Coastguard Worker        srshr           v5.8h,   v5.8h,   #4
2980*c0909341SAndroid Build Coastguard Worker        srshr           v6.8h,   v6.8h,   #4
2981*c0909341SAndroid Build Coastguard Worker        uaddw           v4.8h,   v4.8h,   v0.8b
2982*c0909341SAndroid Build Coastguard Worker        srshr           v7.8h,   v7.8h,   #4
2983*c0909341SAndroid Build Coastguard Worker        uaddw           v5.8h,   v5.8h,   v1.8b
2984*c0909341SAndroid Build Coastguard Worker        uaddw           v6.8h,   v6.8h,   v2.8b
2985*c0909341SAndroid Build Coastguard Worker        sqxtun          v0.8b,   v4.8h
2986*c0909341SAndroid Build Coastguard Worker        uaddw           v7.8h,   v7.8h,   v3.8b
2987*c0909341SAndroid Build Coastguard Worker        sqxtun          v1.8b,   v5.8h
2988*c0909341SAndroid Build Coastguard Worker        st1             {v0.8b}, [x6], x1
2989*c0909341SAndroid Build Coastguard Worker        sqxtun          v2.8b,   v6.8h
2990*c0909341SAndroid Build Coastguard Worker        st1             {v1.8b}, [x9], x10
2991*c0909341SAndroid Build Coastguard Worker        sqxtun          v3.8b,   v7.8h
2992*c0909341SAndroid Build Coastguard Worker        st1             {v2.8b}, [x6], x1
2993*c0909341SAndroid Build Coastguard Worker        st1             {v3.8b}, [x9], x10
2994*c0909341SAndroid Build Coastguard Worker.endm
2995*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v16.8h,  v31.8h,  v17.8h,  v30.8h
2996*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v18.8h,  v29.8h,  v19.8h,  v28.8h
2997*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v20.8h,  v27.8h,  v21.8h,  v26.8h
2998*c0909341SAndroid Build Coastguard Worker        add_dest_addsub v22.8h,  v25.8h,  v23.8h,  v24.8h
2999*c0909341SAndroid Build Coastguard Worker.purgem add_dest_addsub
3000*c0909341SAndroid Build Coastguard Worker        cmp             x7,  x8
3001*c0909341SAndroid Build Coastguard Worker        b.lt            1b
3002*c0909341SAndroid Build Coastguard Worker
3003*c0909341SAndroid Build Coastguard Worker        ret             x14
3004*c0909341SAndroid Build Coastguard Workerendfunc
3005*c0909341SAndroid Build Coastguard Worker
3006*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x64_8bpc_neon, export=1
3007*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  64,  2
3008*c0909341SAndroid Build Coastguard Worker
3009*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3010*c0909341SAndroid Build Coastguard Worker
3011*c0909341SAndroid Build Coastguard Worker        sub_sp          64*32*2+64*8*2
3012*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3013*c0909341SAndroid Build Coastguard Worker
3014*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3015*c0909341SAndroid Build Coastguard Worker
3016*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3017*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3018*c0909341SAndroid Build Coastguard Worker.if \i > 0
3019*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3020*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3021*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3022*c0909341SAndroid Build Coastguard Worker.endif
3023*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
3024*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3025*c0909341SAndroid Build Coastguard Worker        mov             x12, #-2 // shift
3026*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_8h_x64_neon
3027*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3028*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x8_neon
3029*c0909341SAndroid Build Coastguard Worker.if \i < 24
3030*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3031*c0909341SAndroid Build Coastguard Worker.endif
3032*c0909341SAndroid Build Coastguard Worker.endr
3033*c0909341SAndroid Build Coastguard Worker        b               3f
3034*c0909341SAndroid Build Coastguard Worker
3035*c0909341SAndroid Build Coastguard Worker1:
3036*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3037*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3038*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3039*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3040*c0909341SAndroid Build Coastguard Worker2:
3041*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3042*c0909341SAndroid Build Coastguard Worker.rept 4
3043*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3044*c0909341SAndroid Build Coastguard Worker.endr
3045*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3046*c0909341SAndroid Build Coastguard Worker
3047*c0909341SAndroid Build Coastguard Worker3:
3048*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3049*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3050*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3051*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_8h_x64_neon
3052*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
3053*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3054*c0909341SAndroid Build Coastguard Worker.endr
3055*c0909341SAndroid Build Coastguard Worker
3056*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #64*32*2
3057*c0909341SAndroid Build Coastguard Worker        ret             x15
3058*c0909341SAndroid Build Coastguard Workerendfunc
3059*c0909341SAndroid Build Coastguard Worker
3060*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x32_8bpc_neon, export=1
3061*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  32,  1
3062*c0909341SAndroid Build Coastguard Worker
3063*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3064*c0909341SAndroid Build Coastguard Worker
3065*c0909341SAndroid Build Coastguard Worker        sub_sp          64*32*2+64*8*2
3066*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3067*c0909341SAndroid Build Coastguard Worker
3068*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3069*c0909341SAndroid Build Coastguard Worker
3070*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3071*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3072*c0909341SAndroid Build Coastguard Worker.if \i > 0
3073*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3074*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3075*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3076*c0909341SAndroid Build Coastguard Worker.endif
3077*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
3078*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3079*c0909341SAndroid Build Coastguard Worker        mov             x12, #-1 // shift
3080*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_scale_8h_x64_neon
3081*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*64*2)
3082*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x8_neon
3083*c0909341SAndroid Build Coastguard Worker.if \i < 24
3084*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3085*c0909341SAndroid Build Coastguard Worker.endif
3086*c0909341SAndroid Build Coastguard Worker.endr
3087*c0909341SAndroid Build Coastguard Worker        b               3f
3088*c0909341SAndroid Build Coastguard Worker
3089*c0909341SAndroid Build Coastguard Worker1:
3090*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3091*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3092*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3093*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3094*c0909341SAndroid Build Coastguard Worker2:
3095*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3096*c0909341SAndroid Build Coastguard Worker.rept 4
3097*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3098*c0909341SAndroid Build Coastguard Worker.endr
3099*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3100*c0909341SAndroid Build Coastguard Worker
3101*c0909341SAndroid Build Coastguard Worker3:
3102*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3103*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
3104*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3105*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3106*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x32_neon
3107*c0909341SAndroid Build Coastguard Worker.endr
3108*c0909341SAndroid Build Coastguard Worker
3109*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #64*32*2
3110*c0909341SAndroid Build Coastguard Worker        ret             x15
3111*c0909341SAndroid Build Coastguard Workerendfunc
3112*c0909341SAndroid Build Coastguard Worker
3113*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_32x64_8bpc_neon, export=1
3114*c0909341SAndroid Build Coastguard Worker        idct_dc         32,  64,  1
3115*c0909341SAndroid Build Coastguard Worker
3116*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3117*c0909341SAndroid Build Coastguard Worker
3118*c0909341SAndroid Build Coastguard Worker        sub_sp          32*32*2+64*8*2
3119*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3120*c0909341SAndroid Build Coastguard Worker
3121*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_32x32
3122*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3123*c0909341SAndroid Build Coastguard Worker
3124*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3125*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*32*2)
3126*c0909341SAndroid Build Coastguard Worker.if \i > 0
3127*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3128*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3129*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3130*c0909341SAndroid Build Coastguard Worker.if \i < 24
3131*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3132*c0909341SAndroid Build Coastguard Worker.endif
3133*c0909341SAndroid Build Coastguard Worker.endif
3134*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
3135*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3136*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_scale_dct_32x8_neon
3137*c0909341SAndroid Build Coastguard Worker.endr
3138*c0909341SAndroid Build Coastguard Worker        b               3f
3139*c0909341SAndroid Build Coastguard Worker
3140*c0909341SAndroid Build Coastguard Worker1:
3141*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3142*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3143*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3144*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3145*c0909341SAndroid Build Coastguard Worker2:
3146*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #4
3147*c0909341SAndroid Build Coastguard Worker.rept 4
3148*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3149*c0909341SAndroid Build Coastguard Worker.endr
3150*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3151*c0909341SAndroid Build Coastguard Worker
3152*c0909341SAndroid Build Coastguard Worker3:
3153*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3154*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3155*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3156*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_8h_x64_neon
3157*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
3158*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3159*c0909341SAndroid Build Coastguard Worker.endr
3160*c0909341SAndroid Build Coastguard Worker
3161*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #32*32*2
3162*c0909341SAndroid Build Coastguard Worker        ret             x15
3163*c0909341SAndroid Build Coastguard Workerendfunc
3164*c0909341SAndroid Build Coastguard Worker
3165*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_64x16_8bpc_neon, export=1
3166*c0909341SAndroid Build Coastguard Worker        idct_dc         64,  16,  2
3167*c0909341SAndroid Build Coastguard Worker
3168*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3169*c0909341SAndroid Build Coastguard Worker
3170*c0909341SAndroid Build Coastguard Worker        sub_sp          64*16*2+64*8*2
3171*c0909341SAndroid Build Coastguard Worker        add             x4,  sp, #64*8*2
3172*c0909341SAndroid Build Coastguard Worker
3173*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
3174*c0909341SAndroid Build Coastguard Worker
3175*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
3176*c0909341SAndroid Build Coastguard Worker        add             x6,  x4,  #(\i*64*2)
3177*c0909341SAndroid Build Coastguard Worker.if \i > 0
3178*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(16 - \i)
3179*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3180*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3181*c0909341SAndroid Build Coastguard Worker.endif
3182*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
3183*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
3184*c0909341SAndroid Build Coastguard Worker        mov             x12, #-2 // shift
3185*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_clear_8h_x64_neon
3186*c0909341SAndroid Build Coastguard Worker        add             x6,  x4,  #(\i*64*2)
3187*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_dct_64x8_neon
3188*c0909341SAndroid Build Coastguard Worker.if \i < 8
3189*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3190*c0909341SAndroid Build Coastguard Worker.endif
3191*c0909341SAndroid Build Coastguard Worker.endr
3192*c0909341SAndroid Build Coastguard Worker        b               3f
3193*c0909341SAndroid Build Coastguard Worker
3194*c0909341SAndroid Build Coastguard Worker1:
3195*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3196*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3197*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3198*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3199*c0909341SAndroid Build Coastguard Worker2:
3200*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #2
3201*c0909341SAndroid Build Coastguard Worker.rept 4
3202*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3203*c0909341SAndroid Build Coastguard Worker.endr
3204*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3205*c0909341SAndroid Build Coastguard Worker
3206*c0909341SAndroid Build Coastguard Worker3:
3207*c0909341SAndroid Build Coastguard Worker        adr             x5,  inv_dct_8h_x16_neon
3208*c0909341SAndroid Build Coastguard Worker        mov             x8,  #64*2
3209*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24, 32, 40, 48, 56
3210*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
3211*c0909341SAndroid Build Coastguard Worker        add             x7,  x4,  #(\i*2)
3212*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_8x16_neon
3213*c0909341SAndroid Build Coastguard Worker.endr
3214*c0909341SAndroid Build Coastguard Worker
3215*c0909341SAndroid Build Coastguard Worker        add             sp,  x4,  #64*16*2
3216*c0909341SAndroid Build Coastguard Worker        ret             x15
3217*c0909341SAndroid Build Coastguard Workerendfunc
3218*c0909341SAndroid Build Coastguard Worker
3219*c0909341SAndroid Build Coastguard Workerfunction inv_txfm_add_dct_dct_16x64_8bpc_neon, export=1
3220*c0909341SAndroid Build Coastguard Worker        idct_dc         16,  64,  2
3221*c0909341SAndroid Build Coastguard Worker
3222*c0909341SAndroid Build Coastguard Worker        mov             x15, x30
3223*c0909341SAndroid Build Coastguard Worker
3224*c0909341SAndroid Build Coastguard Worker        sub_sp          16*32*2+64*8*2
3225*c0909341SAndroid Build Coastguard Worker        add             x5,  sp, #64*8*2
3226*c0909341SAndroid Build Coastguard Worker
3227*c0909341SAndroid Build Coastguard Worker        movrel          x13, eob_16x32
3228*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3229*c0909341SAndroid Build Coastguard Worker
3230*c0909341SAndroid Build Coastguard Worker        adr             x4,  inv_dct_8h_x16_neon
3231*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8, 16, 24
3232*c0909341SAndroid Build Coastguard Worker        add             x6,  x5,  #(\i*16*2)
3233*c0909341SAndroid Build Coastguard Worker.if \i > 0
3234*c0909341SAndroid Build Coastguard Worker        mov             w8,  #(32 - \i)
3235*c0909341SAndroid Build Coastguard Worker        cmp             w3,  w12
3236*c0909341SAndroid Build Coastguard Worker        b.lt            1f
3237*c0909341SAndroid Build Coastguard Worker.if \i < 24
3238*c0909341SAndroid Build Coastguard Worker        ldrh            w12, [x13], #2
3239*c0909341SAndroid Build Coastguard Worker.endif
3240*c0909341SAndroid Build Coastguard Worker.endif
3241*c0909341SAndroid Build Coastguard Worker        add             x7,  x2,  #(\i*2)
3242*c0909341SAndroid Build Coastguard Worker        mov             x8,  #32*2
3243*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_horz_16x8_neon
3244*c0909341SAndroid Build Coastguard Worker.endr
3245*c0909341SAndroid Build Coastguard Worker        b               3f
3246*c0909341SAndroid Build Coastguard Worker
3247*c0909341SAndroid Build Coastguard Worker1:
3248*c0909341SAndroid Build Coastguard Worker        movi            v4.8h,  #0
3249*c0909341SAndroid Build Coastguard Worker        movi            v5.8h,  #0
3250*c0909341SAndroid Build Coastguard Worker        movi            v6.8h,  #0
3251*c0909341SAndroid Build Coastguard Worker        movi            v7.8h,  #0
3252*c0909341SAndroid Build Coastguard Worker2:
3253*c0909341SAndroid Build Coastguard Worker        subs            w8,  w8,  #8
3254*c0909341SAndroid Build Coastguard Worker.rept 4
3255*c0909341SAndroid Build Coastguard Worker        st1             {v4.8h, v5.8h, v6.8h, v7.8h}, [x6], #64
3256*c0909341SAndroid Build Coastguard Worker.endr
3257*c0909341SAndroid Build Coastguard Worker        b.gt            2b
3258*c0909341SAndroid Build Coastguard Worker
3259*c0909341SAndroid Build Coastguard Worker3:
3260*c0909341SAndroid Build Coastguard Worker.irp i, 0, 8
3261*c0909341SAndroid Build Coastguard Worker        add             x7,  x5,  #(\i*2)
3262*c0909341SAndroid Build Coastguard Worker        mov             x8,  #16*2
3263*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_dct_8h_x64_neon
3264*c0909341SAndroid Build Coastguard Worker        add             x6,  x0,  #(\i)
3265*c0909341SAndroid Build Coastguard Worker        bl              inv_txfm_add_vert_dct_8x64_neon
3266*c0909341SAndroid Build Coastguard Worker.endr
3267*c0909341SAndroid Build Coastguard Worker
3268*c0909341SAndroid Build Coastguard Worker        add             sp,  x5,  #16*32*2
3269*c0909341SAndroid Build Coastguard Worker        ret             x15
3270*c0909341SAndroid Build Coastguard Workerendfunc
3271