xref: /aosp_15_r20/external/libdav1d/src/x86/itx16_avx2.asm (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, VideoLAN and dav1d authors
2*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Two Orioles, LLC
3*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Matthias Dressel
4*c0909341SAndroid Build Coastguard Worker; All rights reserved.
5*c0909341SAndroid Build Coastguard Worker;
6*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker;
9*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker;    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker;
12*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker;    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker;    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker;
16*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker
27*c0909341SAndroid Build Coastguard Worker%include "config.asm"
28*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 32
33*c0909341SAndroid Build Coastguard Workeritx4_shuf:       dd 0x50401600, 0xd0c09284, 0x70603422, 0xf0e0b0a6
34*c0909341SAndroid Build Coastguard Worker                 dd 0x50401701, 0xd0c09385, 0x70603523, 0xf0e0b1a7
35*c0909341SAndroid Build Coastguard Workeridct4_12_shuf:   dd 0, 2, 4, 6, 1, 3, 5, 7
36*c0909341SAndroid Build Coastguard Workeridct4_12_shuf2:  dd 2, 0, 6, 4, 3, 1, 7, 5
37*c0909341SAndroid Build Coastguard Workeriadst8_12_shuf:  dd 0, 4, 1, 5, 2, 6, 3, 7
38*c0909341SAndroid Build Coastguard Workeridct16_12_shuf:  dd 0, 4, 1, 5, 3, 7, 2, 6
39*c0909341SAndroid Build Coastguard Workeriadst16_12_shuf: dd 3, 7, 0, 4, 2, 6, 1, 5
40*c0909341SAndroid Build Coastguard Workerpw_2048_m2048:   dw  2048,  2048,  2048,  2048, -2048, -2048, -2048, -2048
41*c0909341SAndroid Build Coastguard Workeridct4_shuf:   db  0,  1,  4,  5, 12, 13,  8,  9,  2,  3,  6,  7, 14, 15, 10, 11
42*c0909341SAndroid Build Coastguard Workeridct32_shuf:  db  0,  1,  8,  9,  4,  5, 12, 13,  2,  3, 10, 11,  6,  7, 14, 15
43*c0909341SAndroid Build Coastguard Worker
44*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2-3 0
45*c0909341SAndroid Build Coastguard Workerpd_%1_%2: dd %1, %1, %2, %2
46*c0909341SAndroid Build Coastguard Worker%define pd_%1 (pd_%1_%2 + 4*0)
47*c0909341SAndroid Build Coastguard Worker%define pd_%2 (pd_%1_%2 + 4*2)
48*c0909341SAndroid Build Coastguard Worker%if %3
49*c0909341SAndroid Build Coastguard Workerdd -%2, -%2
50*c0909341SAndroid Build Coastguard Worker%define pd_%2_m%2 pd_%2
51*c0909341SAndroid Build Coastguard Worker%endif
52*c0909341SAndroid Build Coastguard Worker%endmacro
53*c0909341SAndroid Build Coastguard Worker
54*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  201,  995
55*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  401, 1931
56*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR  799, 3406
57*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1380,  601
58*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 2440
59*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 1189
60*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2751, 2106
61*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 1567, 1
62*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 3784, 1
63*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 3513
64*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 3920
65*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3703, 3290
66*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 4052
67*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 2276
68*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 3612
69*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4091, 3973
70*c0909341SAndroid Build Coastguard Worker
71*c0909341SAndroid Build Coastguard Workerpd_8:      dd     8
72*c0909341SAndroid Build Coastguard Workerpd_m601:   dd  -601
73*c0909341SAndroid Build Coastguard Workerpd_m1189:  dd -1189
74*c0909341SAndroid Build Coastguard Workerpd_m1380:  dd -1380
75*c0909341SAndroid Build Coastguard Workerpd_m2106:  dd -2106
76*c0909341SAndroid Build Coastguard Workerpd_m2598:  dd -2598
77*c0909341SAndroid Build Coastguard Workerpd_m2751:  dd -2751
78*c0909341SAndroid Build Coastguard Workerpd_m3344:  dd -3344
79*c0909341SAndroid Build Coastguard Workerpd_1024:   dd  1024
80*c0909341SAndroid Build Coastguard Workerpd_1321:   dd  1321
81*c0909341SAndroid Build Coastguard Workerpd_1448:   dd  1448
82*c0909341SAndroid Build Coastguard Workerpd_1697:   dd  1697
83*c0909341SAndroid Build Coastguard Workerpd_2482:   dd  2482
84*c0909341SAndroid Build Coastguard Workerpd_3072:   dd  3072 ; 1024 + 2048
85*c0909341SAndroid Build Coastguard Workerpd_3803:   dd  3803
86*c0909341SAndroid Build Coastguard Workerpd_5119:   dd  5119 ; 1024 + 4096 - 1
87*c0909341SAndroid Build Coastguard Workerpd_5120:   dd  5120 ; 1024 + 4096
88*c0909341SAndroid Build Coastguard Workerpd_5793:   dd  5793
89*c0909341SAndroid Build Coastguard Workerpd_6144:   dd  6144 ; 2048 + 4096
90*c0909341SAndroid Build Coastguard Workerpd_17408:  dd 17408 ; 1024 + 16384
91*c0909341SAndroid Build Coastguard Worker
92*c0909341SAndroid Build Coastguard Workerpixel_10bpc_max: times 2 dw 0x03ff
93*c0909341SAndroid Build Coastguard Workerpixel_12bpc_max: times 2 dw 0x0fff
94*c0909341SAndroid Build Coastguard Workerdconly_10bpc:    times 2 dw 0x7c00
95*c0909341SAndroid Build Coastguard Workerdconly_12bpc:    times 2 dw 0x7000
96*c0909341SAndroid Build Coastguard Workerclip_18b_min:  dd -0x20000
97*c0909341SAndroid Build Coastguard Workerclip_18b_max:  dd  0x1ffff
98*c0909341SAndroid Build Coastguard Workerclip_20b_min:  dd -0x80000
99*c0909341SAndroid Build Coastguard Workerclip_20b_max:  dd  0x7ffff
100*c0909341SAndroid Build Coastguard Worker
101*c0909341SAndroid Build Coastguard Workerconst idct64_mul_16bpc
102*c0909341SAndroid Build Coastguard Workerdd 4095,  101, 2967, -2824,  3745, 1660, 3822, -1474,   401,  4076,   799,  4017
103*c0909341SAndroid Build Coastguard Workerdd -700, 4036, 2359,  3349, -2191, 3461,  897,  3996, -2598, -3166, -4017,  -799
104*c0909341SAndroid Build Coastguard Workerdd 4065,  501, 3229, -2520,  3564, 2019, 3948, -1092,  1931,  3612,  3406,  2276
105*c0909341SAndroid Build Coastguard Workerdd -301, 4085, 2675,  3102, -1842, 3659, 1285,  3889, -1189, -3920, -2276, -3406
106*c0909341SAndroid Build Coastguard Worker
107*c0909341SAndroid Build Coastguard Workercextern deint_shuf
108*c0909341SAndroid Build Coastguard Workercextern idct64_mul
109*c0909341SAndroid Build Coastguard Workercextern pw_1697x8
110*c0909341SAndroid Build Coastguard Workercextern pw_1697x16
111*c0909341SAndroid Build Coastguard Workercextern pw_1567_3784
112*c0909341SAndroid Build Coastguard Workercextern pw_m1567_m3784
113*c0909341SAndroid Build Coastguard Workercextern pw_m3784_1567
114*c0909341SAndroid Build Coastguard Workercextern pw_2896_2896
115*c0909341SAndroid Build Coastguard Workercextern pw_m2896_2896
116*c0909341SAndroid Build Coastguard Workercextern pw_5
117*c0909341SAndroid Build Coastguard Workercextern pw_2048
118*c0909341SAndroid Build Coastguard Workercextern pw_4096
119*c0909341SAndroid Build Coastguard Workercextern pw_8192
120*c0909341SAndroid Build Coastguard Workercextern pw_16384
121*c0909341SAndroid Build Coastguard Workercextern pw_2896x8
122*c0909341SAndroid Build Coastguard Workercextern pd_2048
123*c0909341SAndroid Build Coastguard Worker
124*c0909341SAndroid Build Coastguard Workercextern idct_4x8_internal_8bpc_avx2.main
125*c0909341SAndroid Build Coastguard Workercextern idct_4x16_internal_8bpc_avx2.main
126*c0909341SAndroid Build Coastguard Workercextern idct_8x8_internal_8bpc_avx2.main
127*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx2.main
128*c0909341SAndroid Build Coastguard Workercextern idct_16x4_internal_8bpc_avx2.main
129*c0909341SAndroid Build Coastguard Workercextern idct_16x8_internal_8bpc_avx2.main
130*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx2.main
131*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx2.main
132*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx2.main_fast
133*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx2.main_oddhalf
134*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx2.main_oddhalf_fast
135*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx2.main_part1
136*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx2.main_part2_internal
137*c0909341SAndroid Build Coastguard Worker
138*c0909341SAndroid Build Coastguard Workercextern iadst_4x4_internal_8bpc_avx2.main
139*c0909341SAndroid Build Coastguard Workercextern iadst_4x8_internal_8bpc_avx2.main_pass2
140*c0909341SAndroid Build Coastguard Workercextern iadst_4x16_internal_8bpc_avx2.main2
141*c0909341SAndroid Build Coastguard Workercextern iadst_8x4_internal_8bpc_avx2.main
142*c0909341SAndroid Build Coastguard Workercextern iadst_8x8_internal_8bpc_avx2.main_pass2
143*c0909341SAndroid Build Coastguard Workercextern iadst_8x16_internal_8bpc_avx2.main
144*c0909341SAndroid Build Coastguard Workercextern iadst_8x16_internal_8bpc_avx2.main_pass2_end
145*c0909341SAndroid Build Coastguard Workercextern iadst_16x4_internal_8bpc_avx2.main
146*c0909341SAndroid Build Coastguard Workercextern iadst_16x8_internal_8bpc_avx2.main
147*c0909341SAndroid Build Coastguard Workercextern iadst_16x8_internal_8bpc_avx2.main_pass2_end
148*c0909341SAndroid Build Coastguard Workercextern iadst_16x16_internal_8bpc_avx2.main
149*c0909341SAndroid Build Coastguard Workercextern iadst_16x16_internal_8bpc_avx2.main_pass2_end
150*c0909341SAndroid Build Coastguard Worker
151*c0909341SAndroid Build Coastguard WorkerSECTION .text
152*c0909341SAndroid Build Coastguard Worker
153*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
154*c0909341SAndroid Build Coastguard Worker
155*c0909341SAndroid Build Coastguard Worker%macro WRAP_XMM 1+
156*c0909341SAndroid Build Coastguard Worker    INIT_XMM cpuname
157*c0909341SAndroid Build Coastguard Worker    %1
158*c0909341SAndroid Build Coastguard Worker    INIT_YMM cpuname
159*c0909341SAndroid Build Coastguard Worker%endmacro
160*c0909341SAndroid Build Coastguard Worker
161*c0909341SAndroid Build Coastguard Worker%macro IWHT4_1D_PACKED 0
162*c0909341SAndroid Build Coastguard Worker    ; m0 = in0 in2, m1 = in1 in3
163*c0909341SAndroid Build Coastguard Worker    psubd                m2, m0, m1 ; t2
164*c0909341SAndroid Build Coastguard Worker    paddd               xm0, xm1    ; t0
165*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3322
166*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q1100
167*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q3120
168*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m2
169*c0909341SAndroid Build Coastguard Worker    psrad                m3, 1
170*c0909341SAndroid Build Coastguard Worker    psubd                m3, m1     ; t1 t3
171*c0909341SAndroid Build Coastguard Worker    psubd                m0, m3     ; ____ out0
172*c0909341SAndroid Build Coastguard Worker    paddd                m2, m3     ; out3 ____
173*c0909341SAndroid Build Coastguard Worker%endmacro
174*c0909341SAndroid Build Coastguard Worker
175*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2
176*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_wht_wht_4x4_16bpc, 3, 7, 6, dst, stride, c, eob, bdmax
177*c0909341SAndroid Build Coastguard Worker    mova                xm0, [cq+16*0]
178*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, [cq+16*2], 1
179*c0909341SAndroid Build Coastguard Worker    mova                xm1, [cq+16*1]
180*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, [cq+16*3], 1
181*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
182*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m4
183*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m4
184*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
185*c0909341SAndroid Build Coastguard Worker    psrad                m0, 2
186*c0909341SAndroid Build Coastguard Worker    psrad                m1, 2
187*c0909341SAndroid Build Coastguard Worker    IWHT4_1D_PACKED
188*c0909341SAndroid Build Coastguard Worker    punpckhdq            m0, m3
189*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m2
190*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m3
191*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m3
192*c0909341SAndroid Build Coastguard Worker    IWHT4_1D_PACKED
193*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m2, 0x33
194*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m3
195*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m0, 1
196*c0909341SAndroid Build Coastguard Worker    punpckhdq           xm1, xm0, xm2 ; out2 out1
197*c0909341SAndroid Build Coastguard Worker    punpckldq           xm0, xm2      ; out3 out0
198*c0909341SAndroid Build Coastguard Worker    movq                xm2, [r6  +strideq*1]
199*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [dstq+strideq*0]
200*c0909341SAndroid Build Coastguard Worker    movq                xm3, [r6  +strideq*0]
201*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [dstq+strideq*1]
202*c0909341SAndroid Build Coastguard Worker%ifidn bdmaxd, bdmaxm
203*c0909341SAndroid Build Coastguard Worker    movd                xm5, bdmaxd
204*c0909341SAndroid Build Coastguard Worker    vpbroadcastw        xm5, xm5
205*c0909341SAndroid Build Coastguard Worker%else   ; win64: load from stack
206*c0909341SAndroid Build Coastguard Worker    vpbroadcastw        xm5, bdmaxm
207*c0909341SAndroid Build Coastguard Worker%endif
208*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm2
209*c0909341SAndroid Build Coastguard Worker    paddsw              xm1, xm3
210*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm0, xm4
211*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm1, xm4
212*c0909341SAndroid Build Coastguard Worker    pminsw              xm0, xm5
213*c0909341SAndroid Build Coastguard Worker    pminsw              xm1, xm5
214*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*0], xm0
215*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm1
216*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm1
217*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*1], xm0
218*c0909341SAndroid Build Coastguard Worker    RET
219*c0909341SAndroid Build Coastguard Worker
220*c0909341SAndroid Build Coastguard Worker; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12
221*c0909341SAndroid Build Coastguard Worker; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12
222*c0909341SAndroid Build Coastguard Worker; flags: 1 = packed, 2 = inv_dst2
223*c0909341SAndroid Build Coastguard Worker; skip round/shift if rnd is not a number
224*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2D 8-9 0 ; dst/src[1-2], tmp[1-3], rnd, coef[1-2], flags
225*c0909341SAndroid Build Coastguard Worker%if %8 < 32
226*c0909341SAndroid Build Coastguard Worker    pmulld              m%4, m%1, m%8
227*c0909341SAndroid Build Coastguard Worker    pmulld              m%3, m%2, m%8
228*c0909341SAndroid Build Coastguard Worker%else
229*c0909341SAndroid Build Coastguard Worker%if %9 & 1
230*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m%3, [pd_%8]
231*c0909341SAndroid Build Coastguard Worker%else
232*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%3, [pd_%8]
233*c0909341SAndroid Build Coastguard Worker%endif
234*c0909341SAndroid Build Coastguard Worker    pmulld              m%4, m%1, m%3
235*c0909341SAndroid Build Coastguard Worker    pmulld              m%3, m%2
236*c0909341SAndroid Build Coastguard Worker%endif
237*c0909341SAndroid Build Coastguard Worker%if %7 < 32
238*c0909341SAndroid Build Coastguard Worker    pmulld              m%1, m%7
239*c0909341SAndroid Build Coastguard Worker    pmulld              m%2, m%7
240*c0909341SAndroid Build Coastguard Worker%else
241*c0909341SAndroid Build Coastguard Worker%if %9 & 1
242*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m%5, [pd_%7]
243*c0909341SAndroid Build Coastguard Worker%else
244*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%5, [pd_%7]
245*c0909341SAndroid Build Coastguard Worker%endif
246*c0909341SAndroid Build Coastguard Worker    pmulld              m%1, m%5
247*c0909341SAndroid Build Coastguard Worker    pmulld              m%2, m%5
248*c0909341SAndroid Build Coastguard Worker%endif
249*c0909341SAndroid Build Coastguard Worker%if %9 & 2
250*c0909341SAndroid Build Coastguard Worker    psubd               m%4, m%6, m%4
251*c0909341SAndroid Build Coastguard Worker    psubd               m%2, m%4, m%2
252*c0909341SAndroid Build Coastguard Worker%else
253*c0909341SAndroid Build Coastguard Worker%ifnum %6
254*c0909341SAndroid Build Coastguard Worker    paddd               m%4, m%6
255*c0909341SAndroid Build Coastguard Worker%endif
256*c0909341SAndroid Build Coastguard Worker    paddd               m%2, m%4
257*c0909341SAndroid Build Coastguard Worker%endif
258*c0909341SAndroid Build Coastguard Worker%ifnum %6
259*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%6
260*c0909341SAndroid Build Coastguard Worker%endif
261*c0909341SAndroid Build Coastguard Worker    psubd               m%1, m%3
262*c0909341SAndroid Build Coastguard Worker%ifnum %6
263*c0909341SAndroid Build Coastguard Worker    psrad               m%2, 12
264*c0909341SAndroid Build Coastguard Worker    psrad               m%1, 12
265*c0909341SAndroid Build Coastguard Worker%endif
266*c0909341SAndroid Build Coastguard Worker%endmacro
267*c0909341SAndroid Build Coastguard Worker
268*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 4-5 10 ; type1, type2, eob_offset, size, bitdepth
269*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%4_%5bpc, 4, 5, 0, dst, stride, c, eob, tx2
270*c0909341SAndroid Build Coastguard Worker    %define %%p1 m(i%1_%4_internal_%5bpc)
271*c0909341SAndroid Build Coastguard Worker    ; Jump to the 1st txfm function if we're not taking the fast path, which
272*c0909341SAndroid Build Coastguard Worker    ; in turn performs an indirect jump to the 2nd txfm function.
273*c0909341SAndroid Build Coastguard Worker    lea tx2q, [m(i%2_%4_internal_%5bpc).pass2]
274*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
275*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
276*c0909341SAndroid Build Coastguard Worker    jnz %%p1
277*c0909341SAndroid Build Coastguard Worker%else
278*c0909341SAndroid Build Coastguard Worker%if %3
279*c0909341SAndroid Build Coastguard Worker    add                eobd, %3
280*c0909341SAndroid Build Coastguard Worker%endif
281*c0909341SAndroid Build Coastguard Worker    ; jump to the 1st txfm function unless it's located directly after this
282*c0909341SAndroid Build Coastguard Worker    times ((%%end - %%p1) >> 31) & 1 jmp %%p1
283*c0909341SAndroid Build Coastguard WorkerALIGN function_align
284*c0909341SAndroid Build Coastguard Worker%%end:
285*c0909341SAndroid Build Coastguard Worker%endif
286*c0909341SAndroid Build Coastguard Worker%endmacro
287*c0909341SAndroid Build Coastguard Worker
288*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X4_FN 2-3 10 ; type1, type2, bitdepth
289*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 4x4, %3
290*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
291*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm2, [dconly_%3bpc]
292*c0909341SAndroid Build Coastguard Worker%if %3 = 10
293*c0909341SAndroid Build Coastguard Worker.dconly:
294*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
295*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
296*c0909341SAndroid Build Coastguard Worker    or                  r3d, 4
297*c0909341SAndroid Build Coastguard Worker.dconly2:
298*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
299*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
300*c0909341SAndroid Build Coastguard Worker.dconly3:
301*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
302*c0909341SAndroid Build Coastguard Worker    add                 r6d, 2176
303*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 12
304*c0909341SAndroid Build Coastguard Worker    movd                xm0, r6d
305*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm2
306*c0909341SAndroid Build Coastguard Worker    vpbroadcastw        xm0, xm0
307*c0909341SAndroid Build Coastguard Worker.dconly_loop:
308*c0909341SAndroid Build Coastguard Worker    movq                xm1, [dstq+strideq*0]
309*c0909341SAndroid Build Coastguard Worker    movhps              xm1, [dstq+strideq*1]
310*c0909341SAndroid Build Coastguard Worker    paddsw              xm1, xm0
311*c0909341SAndroid Build Coastguard Worker    psubusw             xm1, xm2
312*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm1
313*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm1
314*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
315*c0909341SAndroid Build Coastguard Worker    sub                 r3d, 2
316*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
317*c0909341SAndroid Build Coastguard Worker    WRAP_XMM RET
318*c0909341SAndroid Build Coastguard Worker%else
319*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly
320*c0909341SAndroid Build Coastguard Worker%endif
321*c0909341SAndroid Build Coastguard Worker%endif
322*c0909341SAndroid Build Coastguard Worker%endmacro
323*c0909341SAndroid Build Coastguard Worker
324*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED 6 ; dst/src[1-2], tmp[1-3], rnd
325*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %1, %2, %3, %4, %5, %6, 2896_1567, 2896_3784, 1
326*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m%3, m%2, m%1 ; t3 t2
327*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m%2, m%1      ; t0 t1
328*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%2, m%3 ; out0 out1
329*c0909341SAndroid Build Coastguard Worker    psubd               m%2, m%3      ; out3 out2
330*c0909341SAndroid Build Coastguard Worker%endmacro
331*c0909341SAndroid Build Coastguard Worker
332*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED_WORD 6 ; dst/src[1-2], tmp[1-3], rnd
333*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%5, [pw_m3784_1567]
334*c0909341SAndroid Build Coastguard Worker    punpckhwd           m%3, m%2, m%1
335*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%4, [pw_1567_3784]
336*c0909341SAndroid Build Coastguard Worker    punpcklwd           m%2, m%1
337*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%1, [pw_m2896_2896]
338*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%5, m%3
339*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%3, m%4
340*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%4, [pw_2896_2896]
341*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%1, m%2
342*c0909341SAndroid Build Coastguard Worker    pmaddwd             m%2, m%4
343*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m%6}, m%5, m%3, m%1, m%2
344*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m%5, m%3, m%1, m%2
345*c0909341SAndroid Build Coastguard Worker    packssdw            m%3, m%5      ; t3 t2
346*c0909341SAndroid Build Coastguard Worker    packssdw            m%2, m%1      ; t0 t1
347*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%2, m%3 ; out0 out1
348*c0909341SAndroid Build Coastguard Worker    psubsw              m%2, m%3      ; out3 out2
349*c0909341SAndroid Build Coastguard Worker%endmacro
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct
352*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity
353*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst
354*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst
355*c0909341SAndroid Build Coastguard Worker
356*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2
357*c0909341SAndroid Build Coastguard Worker    call .main
358*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [idct4_shuf]
359*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
360*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m2
361*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
362*c0909341SAndroid Build Coastguard Worker.pass2:
363*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1
364*c0909341SAndroid Build Coastguard Worker    WRAP_XMM IDCT4_1D_PACKED_WORD 0, 1, 2, 3, 4, 5
365*c0909341SAndroid Build Coastguard Worker    packssdw            xm5, xm5 ; pw_2048
366*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm0, xm5
367*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm1, xm5
368*c0909341SAndroid Build Coastguard Worker    movq                xm2, [dstq+strideq*0]
369*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [dstq+strideq*1]
370*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
371*c0909341SAndroid Build Coastguard Worker    movq                xm3, [r6  +strideq*1]
372*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [r6  +strideq*0]
373*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
374*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
375*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m4
376*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m4
377*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm2
378*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm3
379*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm0, xm4
380*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm1, xm4
381*c0909341SAndroid Build Coastguard Worker    pminsw              xm0, xm5
382*c0909341SAndroid Build Coastguard Worker    pminsw              xm1, xm5
383*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
384*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
385*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*0], xm1
386*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*1], xm1
387*c0909341SAndroid Build Coastguard Worker    RET
388*c0909341SAndroid Build Coastguard WorkerALIGN function_align
389*c0909341SAndroid Build Coastguard Worker.main:
390*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [cq+32*0], q3120
391*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [cq+32*1], q3120
392*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
393*c0909341SAndroid Build Coastguard Worker.main2:
394*c0909341SAndroid Build Coastguard Worker    IDCT4_1D_PACKED       0, 1, 2, 3, 4, 5
395*c0909341SAndroid Build Coastguard Worker    ret
396*c0909341SAndroid Build Coastguard Worker
397*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct
398*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst
399*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst
400*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity
401*c0909341SAndroid Build Coastguard Worker
402*c0909341SAndroid Build Coastguard Worker%macro IADST4_1D 0
403*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_1321]
404*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2482]
405*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m0, m5    ; 1321*in0
406*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m3, m7    ; 2482*in3
407*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6        ; 1321*in0 + 2482*in3
408*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m0, m7    ; 2482*in0
409*c0909341SAndroid Build Coastguard Worker    paddd                m0, m3        ; in0 + in3
410*c0909341SAndroid Build Coastguard Worker    paddd                m7, m5        ; pd_3803
411*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m2        ; 1321*in2
412*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m7        ; 3803*in3
413*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m2        ; 3803*in2
414*c0909341SAndroid Build Coastguard Worker    psubd                m2, m0        ; in2 - in0 - in3
415*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m0, [pd_m3344]
416*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m0        ; -t3
417*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m0        ; out2 (unrounded)
418*c0909341SAndroid Build Coastguard Worker    psubd                m6, m5        ; 2482*in0 - 1321*in2
419*c0909341SAndroid Build Coastguard Worker    paddd                m4, m7        ;  t0
420*c0909341SAndroid Build Coastguard Worker    psubd                m6, m3        ;  t1
421*c0909341SAndroid Build Coastguard Worker    paddd                m3, m4, m6
422*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1        ; out0 (unrounded)
423*c0909341SAndroid Build Coastguard Worker    psubd                m6, m1        ; out1 (unrounded)
424*c0909341SAndroid Build Coastguard Worker    paddd                m3, m1        ; out3 (unrounded)
425*c0909341SAndroid Build Coastguard Worker%endmacro
426*c0909341SAndroid Build Coastguard Worker
427*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2
428*c0909341SAndroid Build Coastguard Worker    call .main
429*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, m4, xm6, 1
430*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m2, xm3, 1
431*c0909341SAndroid Build Coastguard Worker.pass1_end:
432*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
433*c0909341SAndroid Build Coastguard Worker    mova                 m2, [itx4_shuf]
434*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5
435*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
436*c0909341SAndroid Build Coastguard Worker    psrad                m0, 12
437*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
438*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
439*c0909341SAndroid Build Coastguard Worker    vpermd               m0, m2, m0
440*c0909341SAndroid Build Coastguard Worker    psrld                m2, 4
441*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m2
442*c0909341SAndroid Build Coastguard Worker%if WIN64
443*c0909341SAndroid Build Coastguard Worker    movaps             xmm6, [rsp+ 8]
444*c0909341SAndroid Build Coastguard Worker    movaps             xmm7, [rsp+24]
445*c0909341SAndroid Build Coastguard Worker%endif
446*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
447*c0909341SAndroid Build Coastguard Worker.pass2:
448*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
449*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1
450*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_8bpc).main
451*c0909341SAndroid Build Coastguard Worker.end:
452*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm4, [pw_2048]
453*c0909341SAndroid Build Coastguard Worker    movq                xm2, [dstq+strideq*0]
454*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [dstq+strideq*1]
455*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
456*c0909341SAndroid Build Coastguard Worker    movq                xm3, [r6  +strideq*0]
457*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [r6  +strideq*1]
458*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
459*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm0, xm4
460*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm1, xm4
461*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
462*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m4
463*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m4
464*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm2
465*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm3
466*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm0, xm4
467*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm1, xm4
468*c0909341SAndroid Build Coastguard Worker    pminsw              xm0, xm5
469*c0909341SAndroid Build Coastguard Worker    pminsw              xm1, xm5
470*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
471*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
472*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm1
473*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm1
474*c0909341SAndroid Build Coastguard Worker    RET
475*c0909341SAndroid Build Coastguard WorkerALIGN function_align
476*c0909341SAndroid Build Coastguard Worker.main:
477*c0909341SAndroid Build Coastguard Worker    mova                xm0, [cq+16*0]
478*c0909341SAndroid Build Coastguard Worker    mova                xm1, [cq+16*1]
479*c0909341SAndroid Build Coastguard Worker    mova                xm2, [cq+16*2]
480*c0909341SAndroid Build Coastguard Worker    mova                xm3, [cq+16*3]
481*c0909341SAndroid Build Coastguard Worker%if WIN64
482*c0909341SAndroid Build Coastguard Worker    movaps         [rsp+16], xmm6
483*c0909341SAndroid Build Coastguard Worker    movaps         [rsp+32], xmm7
484*c0909341SAndroid Build Coastguard Worker%endif
485*c0909341SAndroid Build Coastguard Worker.main2:
486*c0909341SAndroid Build Coastguard Worker    WRAP_XMM IADST4_1D
487*c0909341SAndroid Build Coastguard Worker    ret
488*c0909341SAndroid Build Coastguard Worker
489*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct
490*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst
491*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst
492*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity
493*c0909341SAndroid Build Coastguard Worker
494*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2
495*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_10bpc).main
496*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, m3, xm2, 1
497*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m6, xm4, 1
498*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_10bpc).pass1_end
499*c0909341SAndroid Build Coastguard Worker.pass2:
500*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
501*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1
502*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_8bpc).main
503*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm4, [pw_2048]
504*c0909341SAndroid Build Coastguard Worker    movq                xm3, [dstq+strideq*1]
505*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [dstq+strideq*0]
506*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
507*c0909341SAndroid Build Coastguard Worker    movq                xm2, [r6  +strideq*1]
508*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [r6  +strideq*0]
509*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
510*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm0, xm4
511*c0909341SAndroid Build Coastguard Worker    pmulhrsw            xm1, xm4
512*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
513*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m4
514*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m4
515*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm2
516*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm3
517*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm0, xm4
518*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm1, xm4
519*c0909341SAndroid Build Coastguard Worker    pminsw              xm0, xm5
520*c0909341SAndroid Build Coastguard Worker    pminsw              xm1, xm5
521*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*0], xm1
522*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*1], xm1
523*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*0], xm0
524*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*1], xm0
525*c0909341SAndroid Build Coastguard Worker    RET
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct
528*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst
529*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst
530*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity
531*c0909341SAndroid Build Coastguard Worker
532*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2
533*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pd_5793]
534*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m1, [cq+32*0]
535*c0909341SAndroid Build Coastguard Worker    pmulld               m1,     [cq+32*1]
536*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
537*c0909341SAndroid Build Coastguard Worker    mova                 m3, [itx4_shuf]
538*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5
539*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
540*c0909341SAndroid Build Coastguard Worker    psrad                m0, 12
541*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
542*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
543*c0909341SAndroid Build Coastguard Worker    vpermd               m0, m3, m0
544*c0909341SAndroid Build Coastguard Worker    psrld                m3, 4
545*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m3
546*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
547*c0909341SAndroid Build Coastguard Worker.pass2:
548*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pw_1697x8]
549*c0909341SAndroid Build Coastguard Worker    movq                xm2, [dstq+strideq*0]
550*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [dstq+strideq*1]
551*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
552*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m0
553*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m1
554*c0909341SAndroid Build Coastguard Worker    movq                xm3, [r6  +strideq*0]
555*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [r6  +strideq*1]
556*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm4, [pixel_10bpc_max]
557*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m5 ; pw_2048
558*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m5
559*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
560*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m5
561*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m5
562*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1
563*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm2
564*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm3
565*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm0, xm5
566*c0909341SAndroid Build Coastguard Worker    pmaxsw              xm1, xm5
567*c0909341SAndroid Build Coastguard Worker    pminsw              xm0, xm4
568*c0909341SAndroid Build Coastguard Worker    pminsw              xm1, xm4
569*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
570*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
571*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm1
572*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm1
573*c0909341SAndroid Build Coastguard Worker    RET
574*c0909341SAndroid Build Coastguard Worker
575*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct,      12
576*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity, 12
577*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst,     12
578*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst, 12
579*c0909341SAndroid Build Coastguard Worker
580*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2
581*c0909341SAndroid Build Coastguard Worker    call m(idct_4x4_internal_10bpc).main
582*c0909341SAndroid Build Coastguard Worker    mova                 m3, [idct4_12_shuf]
583*c0909341SAndroid Build Coastguard Worker    mova                 m4, [idct4_12_shuf2]
584*c0909341SAndroid Build Coastguard Worker    vpermd               m2, m4, m1
585*c0909341SAndroid Build Coastguard Worker    vpermd               m1, m3, m0
586*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).pass1_end2
587*c0909341SAndroid Build Coastguard Worker.pass2:
588*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
589*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
590*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q3120
591*c0909341SAndroid Build Coastguard Worker    call m(idct_4x4_internal_10bpc).main2
592*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
593*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
594*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).end
595*c0909341SAndroid Build Coastguard Worker
596*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct,      12
597*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst,     12
598*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst, 12
599*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity, 12
600*c0909341SAndroid Build Coastguard Worker
601*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2
602*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_10bpc).main
603*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m4, xm6, 1
604*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, xm3, 1
605*c0909341SAndroid Build Coastguard Worker.pass1_end:
606*c0909341SAndroid Build Coastguard Worker    mova                 m3, [itx4_shuf]
607*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_1024]
608*c0909341SAndroid Build Coastguard Worker    psrad                m1, 1
609*c0909341SAndroid Build Coastguard Worker    psrad                m2, 1
610*c0909341SAndroid Build Coastguard Worker    vpermd               m1, m3, m1
611*c0909341SAndroid Build Coastguard Worker    vpermd               m2, m3, m2
612*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
613*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5
614*c0909341SAndroid Build Coastguard Worker    psrad                m1, 11
615*c0909341SAndroid Build Coastguard Worker    psrad                m2, 11
616*c0909341SAndroid Build Coastguard Worker.pass1_end2:
617*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [clip_18b_min]
618*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [clip_18b_max]
619*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m1, m2
620*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m2
621*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m3
622*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m3
623*c0909341SAndroid Build Coastguard Worker    pminsd               m0, m4
624*c0909341SAndroid Build Coastguard Worker    pminsd               m1, m4
625*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
626*c0909341SAndroid Build Coastguard Worker.pass2:
627*c0909341SAndroid Build Coastguard Worker    call .main_pass2
628*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, m4, xm6, 1
629*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m2, xm3, 1
630*c0909341SAndroid Build Coastguard Worker.pass2_end:
631*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
632*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5
633*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
634*c0909341SAndroid Build Coastguard Worker    psrad                m0, 12
635*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
636*c0909341SAndroid Build Coastguard Worker.end:
637*c0909341SAndroid Build Coastguard Worker%if WIN64
638*c0909341SAndroid Build Coastguard Worker    WIN64_RESTORE_XMM_INTERNAL
639*c0909341SAndroid Build Coastguard Worker    %assign xmm_regs_used 6
640*c0909341SAndroid Build Coastguard Worker%endif
641*c0909341SAndroid Build Coastguard Worker.end2:
642*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_16384]
643*c0909341SAndroid Build Coastguard Worker    movq                xm2, [dstq+strideq*0]
644*c0909341SAndroid Build Coastguard Worker    movq                xm3, [dstq+strideq*1]
645*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
646*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [r6  +strideq*0]   ; dst0 dst2
647*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [r6  +strideq*1]   ; dst1 dst3
648*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
649*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, xm3, 1
650*c0909341SAndroid Build Coastguard Worker    psrad                m0, 3
651*c0909341SAndroid Build Coastguard Worker    psrad                m1, 3
652*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1     ; t0 t2 t1 t3
653*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
654*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
655*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m4
656*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m4
657*c0909341SAndroid Build Coastguard Worker    paddw                m0, m2     ; out0 out2 out1 out3
658*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m4
659*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m5
660*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1  ; out1 out3
661*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
662*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*1], xm1
663*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*0], xm0
664*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm1
665*c0909341SAndroid Build Coastguard Worker    RET
666*c0909341SAndroid Build Coastguard Worker.main_pass2:
667*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m1, 1
668*c0909341SAndroid Build Coastguard Worker    mova                xm2, xm1
669*c0909341SAndroid Build Coastguard Worker    vextracti128        xm1, m0, 1
670*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_10bpc).main2
671*c0909341SAndroid Build Coastguard Worker
672*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct,      12
673*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst,     12
674*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst, 12
675*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity, 12
676*c0909341SAndroid Build Coastguard Worker
677*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2
678*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_10bpc).main
679*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m3, xm2, 1
680*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, m6, xm4, 1
681*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).pass1_end
682*c0909341SAndroid Build Coastguard Worker.pass2:
683*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x4_internal_12bpc).main_pass2
684*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, m3, xm2, 1
685*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m6, xm4, 1
686*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).pass2_end
687*c0909341SAndroid Build Coastguard Worker
688*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct,      12
689*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst,     12
690*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst, 12
691*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity, 12
692*c0909341SAndroid Build Coastguard Worker
693*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2
694*c0909341SAndroid Build Coastguard Worker    mova                 m2, [itx4_shuf]
695*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [pd_1697]
696*c0909341SAndroid Build Coastguard Worker    vpermd               m0, m2, [cq+32*0]
697*c0909341SAndroid Build Coastguard Worker    vpermd               m2, m2, [cq+32*1]
698*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
699*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m3, m0
700*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m2
701*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
702*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5
703*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
704*c0909341SAndroid Build Coastguard Worker    psrad                m3, 12
705*c0909341SAndroid Build Coastguard Worker    paddd                m1, m0
706*c0909341SAndroid Build Coastguard Worker    paddd                m2, m3
707*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).pass1_end2
708*c0909341SAndroid Build Coastguard Worker.pass2:
709*c0909341SAndroid Build Coastguard Worker    ; m0 = in0 in1
710*c0909341SAndroid Build Coastguard Worker    ; m1 = in2 in3
711*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [pd_5793]
712*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
713*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m3
714*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m3
715*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5 ; 2048
716*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
717*c0909341SAndroid Build Coastguard Worker    psrad                m0, 12
718*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
719*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x4_internal_12bpc).end
720*c0909341SAndroid Build Coastguard Worker
721*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X8_FN 2-3 10 ; type1, type2, bitdepth
722*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 4x8, %3
723*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
724*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm2, [dconly_%3bpc]
725*c0909341SAndroid Build Coastguard Worker%if %3 = 10
726*c0909341SAndroid Build Coastguard Worker.dconly:
727*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
728*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
729*c0909341SAndroid Build Coastguard Worker    or                  r3d, 8
730*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
731*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
732*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
733*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly2
734*c0909341SAndroid Build Coastguard Worker%else
735*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_4x8_10bpc).dconly
736*c0909341SAndroid Build Coastguard Worker%endif
737*c0909341SAndroid Build Coastguard Worker%endif
738*c0909341SAndroid Build Coastguard Worker%endmacro
739*c0909341SAndroid Build Coastguard Worker
740*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D 8 ; src[1-4], tmp[1-3], rnd
741*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %2, %4, %5, %6, %7, %8, 1567, 3784 ; t2, t3
742*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%5, [pd_2896]
743*c0909341SAndroid Build Coastguard Worker    pmulld              m%1, m%5
744*c0909341SAndroid Build Coastguard Worker    pmulld              m%3, m%5
745*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%8
746*c0909341SAndroid Build Coastguard Worker    paddd               m%5, m%1, m%3
747*c0909341SAndroid Build Coastguard Worker    psubd               m%1, m%3
748*c0909341SAndroid Build Coastguard Worker    psrad               m%5, 12 ; t0
749*c0909341SAndroid Build Coastguard Worker    psrad               m%1, 12 ; t1
750*c0909341SAndroid Build Coastguard Worker    psubd               m%3, m%1, m%2
751*c0909341SAndroid Build Coastguard Worker    paddd               m%2, m%1
752*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%5, m%4
753*c0909341SAndroid Build Coastguard Worker    psubd               m%4, m%5, m%4
754*c0909341SAndroid Build Coastguard Worker%endmacro
755*c0909341SAndroid Build Coastguard Worker
756*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct
757*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity
758*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst
759*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst
760*c0909341SAndroid Build Coastguard Worker
761*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2
762*c0909341SAndroid Build Coastguard Worker.pass1:
763*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [pd_2896]
764*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m3, [cq+32*0]
765*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m3, [cq+32*1]
766*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m3, [cq+32*2]
767*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m3, [cq+32*3]
768*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
769*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m7}, m0, m1, m2, m3
770*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m0, m1, m2, m3
771*c0909341SAndroid Build Coastguard Worker    IDCT4_1D              0, 1, 2, 3, 4, 5, 6, 7
772*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
773*c0909341SAndroid Build Coastguard Worker.pass2:
774*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m2
775*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m3
776*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
777*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1
778*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
779*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m2 ; 2 3
780*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2     ; 0 1
781*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m0, 1  ; 4 5
782*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m1, 1  ; 6 7
783*c0909341SAndroid Build Coastguard Worker    call m(idct_4x8_internal_8bpc).main
784*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm4, [pw_2048]
785*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, xm4}, xm0, xm1, xm2, xm3
786*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
787*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*4]
788*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*0]
789*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*1]
790*c0909341SAndroid Build Coastguard Worker    movq                xm5, [dstq+r3       ]
791*c0909341SAndroid Build Coastguard Worker    movhps              xm5, [dstq+strideq*2]
792*c0909341SAndroid Build Coastguard Worker    movq                xm6, [r6  +strideq*0]
793*c0909341SAndroid Build Coastguard Worker    movhps              xm6, [r6  +strideq*1]
794*c0909341SAndroid Build Coastguard Worker    movq                xm7, [r6  +r3       ]
795*c0909341SAndroid Build Coastguard Worker    movhps              xm7, [r6  +strideq*2]
796*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm4 ; 0 1
797*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm5 ; 3 2
798*c0909341SAndroid Build Coastguard Worker    paddw               xm2, xm6 ; 4 5
799*c0909341SAndroid Build Coastguard Worker    paddw               xm3, xm7 ; 7 6
800*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
801*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
802*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
803*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsw x, xm4}, xm0, xm1, xm2, xm3
804*c0909341SAndroid Build Coastguard Worker    REPX    {pminsw x, xm5}, xm0, xm1, xm2, xm3
805*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
806*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
807*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*2], xm1
808*c0909341SAndroid Build Coastguard Worker    movq   [dstq+r3       ], xm1
809*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm2
810*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm2
811*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*2], xm3
812*c0909341SAndroid Build Coastguard Worker    movq   [r6  +r3       ], xm3
813*c0909341SAndroid Build Coastguard Worker    RET
814*c0909341SAndroid Build Coastguard Worker
815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct
816*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst
817*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst
818*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity
819*c0909341SAndroid Build Coastguard Worker
820*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2
821*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_10bpc).main
822*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
823*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5, m4
824*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5, m6
825*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5
826*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5
827*c0909341SAndroid Build Coastguard Worker.pass1_end:
828*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m0, m1, m2, m3
829*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
830*c0909341SAndroid Build Coastguard Worker.pass2:
831*c0909341SAndroid Build Coastguard Worker    call .pass2_main
832*c0909341SAndroid Build Coastguard Worker    mova                xm4, [pw_2048_m2048]
833*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, xm4}, xm0, xm1, xm2, xm3
834*c0909341SAndroid Build Coastguard Worker.end:
835*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
836*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*4]
837*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*0]
838*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*1]
839*c0909341SAndroid Build Coastguard Worker    movq                xm5, [dstq+strideq*2]
840*c0909341SAndroid Build Coastguard Worker    movhps              xm5, [dstq+r3       ]
841*c0909341SAndroid Build Coastguard Worker    movq                xm6, [r6  +strideq*0]
842*c0909341SAndroid Build Coastguard Worker    movhps              xm6, [r6  +strideq*1]
843*c0909341SAndroid Build Coastguard Worker    movq                xm7, [r6  +strideq*2]
844*c0909341SAndroid Build Coastguard Worker    movhps              xm7, [r6  +r3       ]
845*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm4 ; 0 1
846*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm5 ; 2 3
847*c0909341SAndroid Build Coastguard Worker    paddw               xm2, xm6 ; 4 5
848*c0909341SAndroid Build Coastguard Worker    paddw               xm3, xm7 ; 6 7
849*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
850*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
851*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
852*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsw x, xm4}, xm0, xm1, xm2, xm3
853*c0909341SAndroid Build Coastguard Worker    REPX    {pminsw x, xm5}, xm0, xm1, xm2, xm3
854*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
855*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
856*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*2], xm1
857*c0909341SAndroid Build Coastguard Worker    movhps [dstq+r3       ], xm1
858*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm2
859*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm2
860*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*2], xm3
861*c0909341SAndroid Build Coastguard Worker    movhps [r6  +r3       ], xm3
862*c0909341SAndroid Build Coastguard Worker    RET
863*c0909341SAndroid Build Coastguard WorkerALIGN function_align
864*c0909341SAndroid Build Coastguard Worker.pass2_main:
865*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m2
866*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m3
867*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
868*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m0, m1
869*c0909341SAndroid Build Coastguard Worker    punpckhwd            m0, m1
870*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m4, m0
871*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m0
872*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m4, 1      ; 4 5
873*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m5, 1      ; 6 7
874*c0909341SAndroid Build Coastguard Worker    pshufd              xm4, xm4, q1032 ; 1 0
875*c0909341SAndroid Build Coastguard Worker    pshufd              xm5, xm5, q1032 ; 3 2
876*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_8bpc).main_pass2
877*c0909341SAndroid Build Coastguard WorkerALIGN function_align
878*c0909341SAndroid Build Coastguard Worker.main:
879*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
880*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
881*c0909341SAndroid Build Coastguard Worker.main2:
882*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m0, [cq+16*0]
883*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16*2]
884*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m3, [cq+16*5]
885*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m1, [cq+16*7]
886*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_2896]
887*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m2, 0x0c ; 0 2
888*c0909341SAndroid Build Coastguard Worker    shufpd               m1, m3, 0x0c ; 7 5
889*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16*4]
890*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16*6]
891*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16*1]
892*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m3, [cq+16*3]
893*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
894*c0909341SAndroid Build Coastguard Worker    shufpd               m2, m4, 0x0c ; 4 6
895*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m5, 0x0c ; 3 1
896*c0909341SAndroid Build Coastguard Worker    REPX {pmulld x, m6}, m0, m1, m2, m3
897*c0909341SAndroid Build Coastguard Worker    REPX {paddd  x, m7}, m0, m1, m2, m3
898*c0909341SAndroid Build Coastguard Worker    REPX {psrad  x, 12}, m0, m1, m2, m3
899*c0909341SAndroid Build Coastguard Worker.main3:
900*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 0, 4, 5, 6, 7,  401_1931, 4076_3612, 1
901*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 2, 4, 5, 6, 7, 3166_3920, 2598_1189, 1
902*c0909341SAndroid Build Coastguard Worker    psubd                m4, m0, m2   ; t4  t6
903*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2       ; t0  t2
904*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m3   ; t5  t7
905*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3       ; t1  t3
906*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m4, m2, m0, m1
907*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m4, m2, m0, m1
908*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
909*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4
910*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m2, 0xcc ; t4  t7
911*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m5, 0xcc ; t5 -t6
912*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 2, 3, 5, 6, 7, 1567, 3784
913*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2896]
914*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [pw_2048_m2048] ; + + - -
915*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m0, m1
916*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m1
917*c0909341SAndroid Build Coastguard Worker    psubd                m1, m0, m3   ; t2  t3
918*c0909341SAndroid Build Coastguard Worker    paddd                m0, m3       ;  out0 -out7
919*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m4, m2   ; t7a t6a
920*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m2       ; t5a t4a
921*c0909341SAndroid Build Coastguard Worker    psubd                m2, m4, m3   ; t7  t6
922*c0909341SAndroid Build Coastguard Worker    paddd                m4, m3       ;  out6 -out1
923*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m1, m2
924*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m1, m2
925*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m1, m2, 0xcc
926*c0909341SAndroid Build Coastguard Worker    shufpd               m1, m2, 0x05
927*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m5
928*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m1
929*c0909341SAndroid Build Coastguard Worker    psignd               m0, m6       ;  out0  out7
930*c0909341SAndroid Build Coastguard Worker    psignd               m4, m6       ;  out6  out1
931*c0909341SAndroid Build Coastguard Worker    paddd                m3, m7
932*c0909341SAndroid Build Coastguard Worker    psubd                m2, m3, m5
933*c0909341SAndroid Build Coastguard Worker    paddd                m5, m3
934*c0909341SAndroid Build Coastguard Worker    psrad                m2, 12       ;  out4 -out5
935*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12       ; -out3  out2
936*c0909341SAndroid Build Coastguard Worker    ret
937*c0909341SAndroid Build Coastguard Worker
938*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct
939*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst
940*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst
941*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity
942*c0909341SAndroid Build Coastguard Worker
943*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2
944*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_10bpc).main
945*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
946*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5, m3
947*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5, m2
948*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m6
949*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5, m4
950*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_10bpc).pass1_end
951*c0909341SAndroid Build Coastguard Worker.pass2:
952*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_10bpc).pass2_main
953*c0909341SAndroid Build Coastguard Worker    mova                xm4, [pw_2048_m2048]
954*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, xm4}, xm3, xm2, xm1, xm0
955*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
956*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*4]
957*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*1]
958*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*0]
959*c0909341SAndroid Build Coastguard Worker    movq                xm5, [dstq+r3       ]
960*c0909341SAndroid Build Coastguard Worker    movhps              xm5, [dstq+strideq*2]
961*c0909341SAndroid Build Coastguard Worker    movq                xm6, [r6  +strideq*1]
962*c0909341SAndroid Build Coastguard Worker    movhps              xm6, [r6  +strideq*0]
963*c0909341SAndroid Build Coastguard Worker    movq                xm7, [r6  +r3       ]
964*c0909341SAndroid Build Coastguard Worker    movhps              xm7, [r6  +strideq*2]
965*c0909341SAndroid Build Coastguard Worker    paddw               xm3, xm4 ; 1 0
966*c0909341SAndroid Build Coastguard Worker    paddw               xm2, xm5 ; 3 2
967*c0909341SAndroid Build Coastguard Worker    paddw               xm1, xm6 ; 5 4
968*c0909341SAndroid Build Coastguard Worker    paddw               xm0, xm7 ; 7 6
969*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pixel_10bpc_max]
970*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
971*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
972*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsw x, xm4}, xm3, xm2, xm1, xm0
973*c0909341SAndroid Build Coastguard Worker    REPX    {pminsw x, xm5}, xm3, xm2, xm1, xm0
974*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*0], xm3
975*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*1], xm3
976*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*2], xm2
977*c0909341SAndroid Build Coastguard Worker    movq   [dstq+r3       ], xm2
978*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*0], xm1
979*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*1], xm1
980*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*2], xm0
981*c0909341SAndroid Build Coastguard Worker    movq   [r6  +r3       ], xm0
982*c0909341SAndroid Build Coastguard Worker    RET
983*c0909341SAndroid Build Coastguard Worker
984*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct
985*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst
986*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst
987*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity
988*c0909341SAndroid Build Coastguard Worker
989*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2
990*c0909341SAndroid Build Coastguard Worker.pass1:
991*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [pd_2896]
992*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m3, [cq+32*0]
993*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m3, [cq+32*1]
994*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m3, [cq+32*2]
995*c0909341SAndroid Build Coastguard Worker    pmulld               m3,     [cq+32*3]
996*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
997*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_5793]
998*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m5}, m0, m1, m2, m3
999*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m0, m1, m2, m3
1000*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m4}, m0, m1, m2, m3
1001*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m5}, m0, m1, m2, m3
1002*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m0, m1, m2, m3
1003*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1004*c0909341SAndroid Build Coastguard Worker.pass2:
1005*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pixel_10bpc_max]
1006*c0909341SAndroid Build Coastguard Worker    call .pass2_end
1007*c0909341SAndroid Build Coastguard Worker    RET
1008*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1009*c0909341SAndroid Build Coastguard Worker.pass2_end:
1010*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_4096]
1011*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m2
1012*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m3
1013*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1
1014*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
1015*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m4
1016*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
1017*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m2 ; 2 3 6 7
1018*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2     ; 0 1 4 5
1019*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
1020*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*4]
1021*c0909341SAndroid Build Coastguard Worker    movq                xm2, [dstq+strideq*0]
1022*c0909341SAndroid Build Coastguard Worker    movhps              xm2, [dstq+strideq*1]
1023*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m4, [r6  +strideq*0]
1024*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m5, [r6  +strideq*1]
1025*c0909341SAndroid Build Coastguard Worker    movq                xm3, [dstq+strideq*2]
1026*c0909341SAndroid Build Coastguard Worker    movhps              xm3, [dstq+r3       ]
1027*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m4, 0x30
1028*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m5, 0xc0
1029*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m4, [r6  +strideq*2]
1030*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m5, [r6  +r3       ]
1031*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m4, 0x30
1032*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m5, 0xc0
1033*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
1034*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
1035*c0909341SAndroid Build Coastguard Worker    paddw                m0, m2 ; out0 out1 out4 out5
1036*c0909341SAndroid Build Coastguard Worker    paddw                m1, m3 ; out2 out3 out6 out7
1037*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m4
1038*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m4
1039*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m6
1040*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m6
1041*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m0, 1  ; out4 out5
1042*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m1, 1  ; out6 out7
1043*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
1044*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
1045*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*2], xm1
1046*c0909341SAndroid Build Coastguard Worker    movhps [dstq+r3       ], xm1
1047*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm2
1048*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm2
1049*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*2], xm3
1050*c0909341SAndroid Build Coastguard Worker    movhps [r6  +r3       ], xm3
1051*c0909341SAndroid Build Coastguard Worker    ret
1052*c0909341SAndroid Build Coastguard Worker
1053*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct,      12
1054*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity, 12
1055*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst,     12
1056*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst, 12
1057*c0909341SAndroid Build Coastguard Worker
1058*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
1059*c0909341SAndroid Build Coastguard Worker    jmp m(idct_4x8_internal_10bpc).pass1
1060*c0909341SAndroid Build Coastguard Worker.pass2:
1061*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
1062*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
1063*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
1064*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
1065*c0909341SAndroid Build Coastguard Worker    ; transpose & interleave
1066*c0909341SAndroid Build Coastguard Worker    pshufd               m0, m0, q1320
1067*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m1, q1320
1068*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m2, q1320
1069*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m3, q1320
1070*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m0, m1
1071*c0909341SAndroid Build Coastguard Worker    punpckhdq            m0, m1
1072*c0909341SAndroid Build Coastguard Worker    punpckldq            m5, m2, m3
1073*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m3
1074*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3102
1075*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3102
1076*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x31   ; 1 5 (interleaved)
1077*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m0, m2, 0x20   ; 7 3 (interleaved)
1078*c0909341SAndroid Build Coastguard Worker    vperm2i128           m0, m4, m5, 0x20   ; 0 2 (interleaved)
1079*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m4, m5, 0x31   ; 4 6 (interleaved)
1080*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
1081*c0909341SAndroid Build Coastguard Worker    call m(idct_8x4_internal_10bpc).main
1082*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m4  ; out7 out6
1083*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4      ; out0 out1
1084*c0909341SAndroid Build Coastguard Worker    paddd                m1, m2, m5  ; out3 out2
1085*c0909341SAndroid Build Coastguard Worker    psubd                m2, m5      ; out4 out5
1086*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m1, q1032
1087*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m3, q1032
1088*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_12bpc).end
1089*c0909341SAndroid Build Coastguard Worker
1090*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct,      12
1091*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst,     12
1092*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst, 12
1093*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity, 12
1094*c0909341SAndroid Build Coastguard Worker
1095*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
1096*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_10bpc).main
1097*c0909341SAndroid Build Coastguard Worker    psrad                m0, m4, 1
1098*c0909341SAndroid Build Coastguard Worker    psrad                m1, m6, 1
1099*c0909341SAndroid Build Coastguard Worker    psrad                m2, 1
1100*c0909341SAndroid Build Coastguard Worker    psrad                m3, 1
1101*c0909341SAndroid Build Coastguard Worker.pass1_end:
1102*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_1024]
1103*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m5}, m0, m1, m2, m3
1104*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 11}, m0, m1, m2, m3
1105*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1106*c0909341SAndroid Build Coastguard Worker.pass2:
1107*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
1108*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
1109*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
1110*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
1111*c0909341SAndroid Build Coastguard Worker    call .pass2_main
1112*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, m4, 0x33 ; out6 out7
1113*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m4, 0xcc     ; out0 out1
1114*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m5, q1032
1115*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6           ; out4 out5
1116*c0909341SAndroid Build Coastguard Worker    psignd               m1, m6           ; out2 out3
1117*c0909341SAndroid Build Coastguard Worker.end:
1118*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_16384]
1119*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 3}, m0, m1, m2, m3
1120*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m2     ; 0 1 4 5 (interleaved)
1121*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m3     ; 2 3 6 7 (interleaved)
1122*c0909341SAndroid Build Coastguard Worker    mova                 m2, [iadst8_12_shuf]
1123*c0909341SAndroid Build Coastguard Worker    vpermd               m0, m2, m0 ; 0 1 4 5
1124*c0909341SAndroid Build Coastguard Worker    vpermd               m1, m2, m1 ; 2 3 6 7
1125*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
1126*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m4
1127*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
1128*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*4]
1129*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*0]
1130*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*1]
1131*c0909341SAndroid Build Coastguard Worker    movq                xm5, [dstq+strideq*2]
1132*c0909341SAndroid Build Coastguard Worker    movhps              xm5, [dstq+r3       ]
1133*c0909341SAndroid Build Coastguard Worker    movq                xm6, [r6  +strideq*0]
1134*c0909341SAndroid Build Coastguard Worker    movhps              xm6, [r6  +strideq*1]
1135*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, xm6, 1
1136*c0909341SAndroid Build Coastguard Worker    movq                xm7, [r6  +strideq*2]
1137*c0909341SAndroid Build Coastguard Worker    movhps              xm7, [r6  +r3       ]
1138*c0909341SAndroid Build Coastguard Worker    vinserti128          m5, xm7, 1
1139*c0909341SAndroid Build Coastguard Worker    paddw                m0, m4 ; 0 1 4 5
1140*c0909341SAndroid Build Coastguard Worker    paddw                m1, m5 ; 2 3 6 7
1141*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
1142*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
1143*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
1144*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsw x,  m4}, m0, m1
1145*c0909341SAndroid Build Coastguard Worker    REPX    {pminsw x,  m5}, m0, m1
1146*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m0, 1  ; out4 out5
1147*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m1, 1  ; out6 out7
1148*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm0
1149*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm0
1150*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*2], xm1
1151*c0909341SAndroid Build Coastguard Worker    movhps [dstq+r3       ], xm1
1152*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*0], xm2
1153*c0909341SAndroid Build Coastguard Worker    movhps [r6  +strideq*1], xm2
1154*c0909341SAndroid Build Coastguard Worker    movq   [r6  +strideq*2], xm3
1155*c0909341SAndroid Build Coastguard Worker    movhps [r6  +r3       ], xm3
1156*c0909341SAndroid Build Coastguard Worker    RET
1157*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1158*c0909341SAndroid Build Coastguard Worker.pass2_main:
1159*c0909341SAndroid Build Coastguard Worker    ; transpose & interleave
1160*c0909341SAndroid Build Coastguard Worker    pshufd               m0, m0, q1320
1161*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m1, q1320
1162*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m2, q1320
1163*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m3, q1320
1164*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m0, m1
1165*c0909341SAndroid Build Coastguard Worker    punpckhdq            m0, m1
1166*c0909341SAndroid Build Coastguard Worker    punpckldq            m5, m2, m3
1167*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m3
1168*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x31   ; 7 5 (interleaved)
1169*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m0, m2, 0x20   ; 3 1 (interleaved)
1170*c0909341SAndroid Build Coastguard Worker    vperm2i128           m0, m4, m5, 0x20   ; 0 2 (interleaved)
1171*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m4, m5, 0x31   ; 4 6 (interleaved)
1172*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
1173*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_10bpc).main3
1174*c0909341SAndroid Build Coastguard Worker
1175*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct,      12
1176*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst,     12
1177*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst, 12
1178*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity, 12
1179*c0909341SAndroid Build Coastguard Worker
1180*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
1181*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_10bpc).main
1182*c0909341SAndroid Build Coastguard Worker    psrad                m0, m3, 1
1183*c0909341SAndroid Build Coastguard Worker    psrad                m1, m2, 1
1184*c0909341SAndroid Build Coastguard Worker    psrad                m2, m6, 1
1185*c0909341SAndroid Build Coastguard Worker    psrad                m3, m4, 1
1186*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_12bpc).pass1_end
1187*c0909341SAndroid Build Coastguard Worker.pass2:
1188*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
1189*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
1190*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
1191*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
1192*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_12bpc).pass2_main
1193*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m4, m0, 0x05 ; out1 out0
1194*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x05     ; out7 out6
1195*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6
1196*c0909341SAndroid Build Coastguard Worker    pshufd               m6, m6, q1032
1197*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m2, q1032    ; out5 out4
1198*c0909341SAndroid Build Coastguard Worker    psignd               m2, m5, m6       ; out3 out2
1199*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x8_internal_12bpc).end
1200*c0909341SAndroid Build Coastguard Worker
1201*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct,      12
1202*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst,     12
1203*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst, 12
1204*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity, 12
1205*c0909341SAndroid Build Coastguard Worker
1206*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
1207*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_4x8_internal_10bpc).pass1
1208*c0909341SAndroid Build Coastguard Worker.pass2:
1209*c0909341SAndroid Build Coastguard Worker    ; m0 = in0 in1
1210*c0909341SAndroid Build Coastguard Worker    ; m1 = in2 in3
1211*c0909341SAndroid Build Coastguard Worker    ; m2 = in4 in5
1212*c0909341SAndroid Build Coastguard Worker    ; m3 = in6 in7
1213*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pixel_12bpc_max]
1214*c0909341SAndroid Build Coastguard Worker    call m(iidentity_4x8_internal_10bpc).pass2_end
1215*c0909341SAndroid Build Coastguard Worker    RET
1216*c0909341SAndroid Build Coastguard Worker
1217*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X16_FN 2-3 10 ; type1, type2, bitdepth
1218*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 4x16, %3
1219*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
1220*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
1221*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm2, [dconly_%3bpc]
1222*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
1223*c0909341SAndroid Build Coastguard Worker    or                  r3d, 16
1224*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
1225*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
1226*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly3
1227*c0909341SAndroid Build Coastguard Worker%endif
1228*c0909341SAndroid Build Coastguard Worker%endmacro
1229*c0909341SAndroid Build Coastguard Worker
1230*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct
1231*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity
1232*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst
1233*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst
1234*c0909341SAndroid Build Coastguard Worker
1235*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2
1236*c0909341SAndroid Build Coastguard Worker.pass1:
1237*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_3072]
1238*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*2]
1239*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*6]
1240*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*3]
1241*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*7]
1242*c0909341SAndroid Build Coastguard Worker    call .pass1_main
1243*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m6, [cq+32*0]
1244*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m6, [cq+32*4]
1245*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m6, [cq+32*1]
1246*c0909341SAndroid Build Coastguard Worker    pmulld               m6,     [cq+32*5]
1247*c0909341SAndroid Build Coastguard Worker    call .pass1_main2
1248*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7
1249*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1250*c0909341SAndroid Build Coastguard Worker.pass2:
1251*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
1252*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
1253*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
1254*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
1255*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
1256*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m2, m3
1257*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3
1258*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m0, m1
1259*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
1260*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m4     ; 2 3
1261*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m4         ; 0 1
1262*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m5, m2     ; 8 9
1263*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m2         ; a b
1264*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m0, 1      ; 4 5
1265*c0909341SAndroid Build Coastguard Worker    vextracti128        xm3, m1, 1      ; 6 7
1266*c0909341SAndroid Build Coastguard Worker    vextracti128        xm6, m4, 1      ; c d
1267*c0909341SAndroid Build Coastguard Worker    vextracti128        xm7, m5, 1      ; e f
1268*c0909341SAndroid Build Coastguard Worker    call m(idct_4x16_internal_8bpc).main
1269*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pw_2048]
1270*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, m0, xm1, 1 ; 0 1   3 2
1271*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m2, xm3, 1 ; 4 5   7 6
1272*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, m4, xm5, 1 ; 8 9   b a
1273*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, m6, xm7, 1 ; c d   f e
1274*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pixel_10bpc_max]
1275*c0909341SAndroid Build Coastguard Worker    call .pass2_end
1276*c0909341SAndroid Build Coastguard Worker    RET
1277*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1278*c0909341SAndroid Build Coastguard Worker.pass1_main:
1279*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_3784]
1280*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_1567]
1281*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_2048]
1282*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_1448]
1283*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 3, 0, 2, _, 9, 8, 4 ; t2l, t3l
1284*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 7, 4, 2, _, 9, 8, 4 ; t2h, t3h
1285*c0909341SAndroid Build Coastguard Worker    ret
1286*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1287*c0909341SAndroid Build Coastguard Worker.pass1_main2:
1288*c0909341SAndroid Build Coastguard Worker    paddd                m0, m10
1289*c0909341SAndroid Build Coastguard Worker    paddd                m4, m10
1290*c0909341SAndroid Build Coastguard Worker    paddd                m8, m0, m2
1291*c0909341SAndroid Build Coastguard Worker    psubd                m0, m2
1292*c0909341SAndroid Build Coastguard Worker    paddd                m9, m4, m6
1293*c0909341SAndroid Build Coastguard Worker    psubd                m4, m6
1294*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 11}, m8, m0, m9, m4 ; t0l, t1l, t0h, t1h
1295*c0909341SAndroid Build Coastguard Worker    psubd                m2, m0, m1
1296*c0909341SAndroid Build Coastguard Worker    paddd                m1, m0
1297*c0909341SAndroid Build Coastguard Worker    psubd                m6, m4, m5
1298*c0909341SAndroid Build Coastguard Worker    paddd                m5, m4
1299*c0909341SAndroid Build Coastguard Worker    paddd                m0, m8, m3
1300*c0909341SAndroid Build Coastguard Worker    psubd                m3, m8, m3
1301*c0909341SAndroid Build Coastguard Worker    paddd                m4, m9, m7
1302*c0909341SAndroid Build Coastguard Worker    psubd                m7, m9, m7
1303*c0909341SAndroid Build Coastguard Worker    ret
1304*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1305*c0909341SAndroid Build Coastguard Worker.pass2_end:
1306*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
1307*c0909341SAndroid Build Coastguard Worker    pxor                 m7, m7
1308*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m9
1309*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1310*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, m9
1311*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1312*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m9
1313*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1314*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, m9
1315*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1316*c0909341SAndroid Build Coastguard Worker    ret
1317*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1318*c0909341SAndroid Build Coastguard Worker.write_4x4:
1319*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*0]
1320*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*1]
1321*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m5, [dstq+strideq*2]
1322*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m6, [dstq+r6       ]
1323*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m7
1324*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m7
1325*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*2
1326*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m5, 0xc0
1327*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m6, 0x30
1328*c0909341SAndroid Build Coastguard Worker    paddw                m4, m0
1329*c0909341SAndroid Build Coastguard Worker    pmaxsw               m4, m7
1330*c0909341SAndroid Build Coastguard Worker    pminsw               m4, m8
1331*c0909341SAndroid Build Coastguard Worker    vextracti128        xm5, m4, 1
1332*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm4
1333*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm4
1334*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*2], xm5
1335*c0909341SAndroid Build Coastguard Worker    movq   [dstq+r6       ], xm5
1336*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1337*c0909341SAndroid Build Coastguard Worker    ret
1338*c0909341SAndroid Build Coastguard Worker
1339*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct
1340*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst
1341*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst
1342*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity
1343*c0909341SAndroid Build Coastguard Worker
1344*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2
1345*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main
1346*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_6144]
1347*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main_end
1348*c0909341SAndroid Build Coastguard Worker    psrad                m0, m4, 13
1349*c0909341SAndroid Build Coastguard Worker    psrad                m1, m5, 13
1350*c0909341SAndroid Build Coastguard Worker    psrad                m2, 13
1351*c0909341SAndroid Build Coastguard Worker    psrad                m3, 13
1352*c0909341SAndroid Build Coastguard Worker    psrad                m4, m8, 13
1353*c0909341SAndroid Build Coastguard Worker    psrad                m5, m9, 13
1354*c0909341SAndroid Build Coastguard Worker    psrad                m6, 13
1355*c0909341SAndroid Build Coastguard Worker    psrad                m7, 13
1356*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1357*c0909341SAndroid Build Coastguard Worker.pass2:
1358*c0909341SAndroid Build Coastguard Worker    call .pass2_main
1359*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_2048]
1360*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pixel_10bpc_max]
1361*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
1362*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m3, m0, 0xcc ; -out3   out0   out2  -out1
1363*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m2, q1032    ; -out11  out8   out10 -out9
1364*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, 0x33     ; -out15  out12  out14 -out13
1365*c0909341SAndroid Build Coastguard Worker    pxor                 m7, m7
1366*c0909341SAndroid Build Coastguard Worker    psubw                m9, m7, m5
1367*c0909341SAndroid Build Coastguard Worker    vpblendd             m9, m5, 0x3c     ; -2048   2048   2048  -2048
1368*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4, m9
1369*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1370*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, m9
1371*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1372*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m9
1373*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1374*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, m9
1375*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1376*c0909341SAndroid Build Coastguard Worker    RET
1377*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1378*c0909341SAndroid Build Coastguard Worker.write_4x4:
1379*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+r6       ]
1380*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+strideq*0]
1381*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m5, [dstq+strideq*1]
1382*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m6, [dstq+strideq*2]
1383*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m7
1384*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m7
1385*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*2
1386*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m5, 0xc0
1387*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m6, 0x30
1388*c0909341SAndroid Build Coastguard Worker    paddw                m4, m0
1389*c0909341SAndroid Build Coastguard Worker    pmaxsw               m4, m7
1390*c0909341SAndroid Build Coastguard Worker    pminsw               m4, m8
1391*c0909341SAndroid Build Coastguard Worker    vextracti128        xm5, m4, 1
1392*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*0], xm4
1393*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm5
1394*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*2], xm5
1395*c0909341SAndroid Build Coastguard Worker    movq   [dstq+r6       ], xm4
1396*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1397*c0909341SAndroid Build Coastguard Worker    ret
1398*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1399*c0909341SAndroid Build Coastguard Worker.pass2_main:
1400*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
1401*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
1402*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
1403*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
1404*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
1405*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m2, m3
1406*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3
1407*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m0, m1
1408*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
1409*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m4
1410*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m4
1411*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m5, m2
1412*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m2
1413*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, m1, 0x33
1414*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m1, 0xcc
1415*c0909341SAndroid Build Coastguard Worker    shufpd               m2, m5, m4, 0x05
1416*c0909341SAndroid Build Coastguard Worker    shufpd               m4, m5, 0x05
1417*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m3, 0x31 ; 4 7   6 5
1418*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm3, 1       ; 0 3   2 1
1419*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m2, m4, 0x31 ; c f   e d ; ????
1420*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, xm4, 1       ; b 8   9 a
1421*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_8bpc).main2
1422*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_2896x8]
1423*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m2, m4
1424*c0909341SAndroid Build Coastguard Worker    psubsw               m2, m4
1425*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m5           ; -out7   out4   out6  -out5
1426*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m5           ;  out8  -out11 -out9   out10
1427*c0909341SAndroid Build Coastguard Worker    ret
1428*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1429*c0909341SAndroid Build Coastguard Worker.main:
1430*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m0, [cq+16* 0]
1431*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16* 2]
1432*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m1, [cq+16*15]
1433*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16*13]
1434*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16* 4]
1435*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [cq+16* 6]
1436*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m3, [cq+16*11]
1437*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m7, [cq+16* 9]
1438*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x0c ;  0  2
1439*c0909341SAndroid Build Coastguard Worker    shufpd               m1, m5, 0x0c ; 15 13
1440*c0909341SAndroid Build Coastguard Worker    shufpd               m2, m6, 0x0c ;  4  6
1441*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m7, 0x0c ; 11  9
1442*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16* 8]
1443*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [cq+16*10]
1444*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16* 7]
1445*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m7, [cq+16* 5]
1446*c0909341SAndroid Build Coastguard Worker    shufpd               m4, m6, 0x0c ;  8 10
1447*c0909341SAndroid Build Coastguard Worker    shufpd               m5, m7, 0x0c ;  7  5
1448*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [cq+16*12]
1449*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m7, [cq+16*14]
1450*c0909341SAndroid Build Coastguard Worker    shufpd               m6, m7, 0x0c ; 12 14
1451*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m7, [cq+16* 3]
1452*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m8, [cq+16* 1]
1453*c0909341SAndroid Build Coastguard Worker    shufpd               m7, m8, 0x0c ;  3  1
1454*c0909341SAndroid Build Coastguard Worker.main2:
1455*c0909341SAndroid Build Coastguard Worker    ; expects: m12 = clip_min   m13 = clip_max
1456*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
1457*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 0, 8, 9, 10, 11,  201_995,  4091_3973, 1
1458*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 2, 8, 9, 10, 11, 1751_2440, 3703_3290, 1
1459*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 4, 8, 9, 10, 11, 3035_3513, 2751_2106, 1
1460*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 6, 8, 9, 10, 11, 3857_4052, 1380_601,  1
1461*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t8a  t10a
1462*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t0a  t2a
1463*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1, m5 ; t9a  t11a
1464*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5     ; t1a  t3a
1465*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2, m6 ; t12a t14a
1466*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6     ; t4a  t6a
1467*c0909341SAndroid Build Coastguard Worker    psubd                m6, m3, m7 ; t13a t15a
1468*c0909341SAndroid Build Coastguard Worker    paddd                m3, m7     ; t5a  t7a
1469*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m8
1470*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m8
1471*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         8, 4, 7, 9, 10, 11,  799_3406, 4017_2276, 1
1472*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 5, 7, 9, 10, 11, 4017_2276, 10,        1
1473*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m2 ; t4   t6
1474*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2     ; t0   t2
1475*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m3 ; t5   t7
1476*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3     ; t1   t3
1477*c0909341SAndroid Build Coastguard Worker    psubd                m3, m4, m6 ; t12a t14a
1478*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6     ; t8a  t10a
1479*c0909341SAndroid Build Coastguard Worker    psubd                m6, m8, m5 ; t13a t15a
1480*c0909341SAndroid Build Coastguard Worker    paddd                m8, m5     ; t9a  t11a
1481*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m6, m7, m8
1482*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m6, m7, m8
1483*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m3, m7 ; t12a t4
1484*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m7     ; t14a t6
1485*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m7, m6, m2 ; t15a t7
1486*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m6, m2     ; t13a t5
1487*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 3, 2, 9, 10, 11, 3784, 1567
1488*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 6, 2, 9, 10, 11, 1567, 10
1489*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_2896]
1490*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m9, [pw_2048_m2048] ; + + - -
1491*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m4, m0 ; t10a t2
1492*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m0     ; t8a  t0
1493*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m0, m8, m1 ; t11a t3
1494*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m1     ; t9a  t1
1495*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6, m7 ; out2   -out3
1496*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7     ; t14a t6
1497*c0909341SAndroid Build Coastguard Worker    paddd                m7, m5, m3 ; -out13  out12
1498*c0909341SAndroid Build Coastguard Worker    psubd                m5, m3     ; t15a t7
1499*c0909341SAndroid Build Coastguard Worker    psubd                m3, m8, m0 ; t11  t3a
1500*c0909341SAndroid Build Coastguard Worker    paddd                m8, m0     ; out14  -out15
1501*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4, m2 ; -out1   out0
1502*c0909341SAndroid Build Coastguard Worker    psubd                m4, m2     ; t10  t2a
1503*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m6, m5, m3, m4
1504*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m6, m5, m3, m4
1505*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m10}, m6, m5, m3, m4
1506*c0909341SAndroid Build Coastguard Worker    paddd                m6, m11
1507*c0909341SAndroid Build Coastguard Worker    paddd                m4, m11
1508*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6, m5 ; -out5   out4
1509*c0909341SAndroid Build Coastguard Worker    psubd                m6, m5     ;  out10 -out11
1510*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4, m3 ; -out9   out8
1511*c0909341SAndroid Build Coastguard Worker    paddd                m3, m4     ;  out6  -out7
1512*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m2, m3, m5, m6
1513*c0909341SAndroid Build Coastguard Worker    REPX     {psignd x, m9}, m1, m8, m3, m6
1514*c0909341SAndroid Build Coastguard Worker    pshufd               m9, m9, q1032
1515*c0909341SAndroid Build Coastguard Worker    REPX     {psignd x, m9}, m0, m7, m2, m5
1516*c0909341SAndroid Build Coastguard Worker    ret
1517*c0909341SAndroid Build Coastguard Worker
1518*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct
1519*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst
1520*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst
1521*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity
1522*c0909341SAndroid Build Coastguard Worker
1523*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2
1524*c0909341SAndroid Build Coastguard Worker.pass1:
1525*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main
1526*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_6144]
1527*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main_end
1528*c0909341SAndroid Build Coastguard Worker    psrad                m0, m3, 13
1529*c0909341SAndroid Build Coastguard Worker    psrad                m1, m2, 13
1530*c0909341SAndroid Build Coastguard Worker    psrad                m2, m5, 13
1531*c0909341SAndroid Build Coastguard Worker    psrad                m3, m4, 13
1532*c0909341SAndroid Build Coastguard Worker    psrad                m4, m7, 13
1533*c0909341SAndroid Build Coastguard Worker    psrad                m5, m6, 13
1534*c0909341SAndroid Build Coastguard Worker    psrad                m6, m9, 13
1535*c0909341SAndroid Build Coastguard Worker    psrad                m7, m8, 13
1536*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1537*c0909341SAndroid Build Coastguard Worker.pass2:
1538*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).pass2_main
1539*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_2048]
1540*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pixel_10bpc_max]
1541*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
1542*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m3, m0, 0x33 ; -out0   out3   out1  -out2
1543*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m2, q1032    ; -out11  out8   out10 -out9
1544*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, 0xcc     ; -out12  out15  out13 -out14
1545*c0909341SAndroid Build Coastguard Worker    pxor                 m7, m7
1546*c0909341SAndroid Build Coastguard Worker    psubw                m9, m7, m5
1547*c0909341SAndroid Build Coastguard Worker    vpblendd             m9, m5, 0x3c     ; -2048   2048   2048  -2048
1548*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4, m9
1549*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1550*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m9
1551*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1552*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1, m9
1553*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1554*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m3, m9
1555*c0909341SAndroid Build Coastguard Worker    call .write_4x4
1556*c0909341SAndroid Build Coastguard Worker    RET
1557*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1558*c0909341SAndroid Build Coastguard Worker.write_4x4:
1559*c0909341SAndroid Build Coastguard Worker    movq                xm4, [dstq+strideq*0]
1560*c0909341SAndroid Build Coastguard Worker    movhps              xm4, [dstq+r6       ]
1561*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m5, [dstq+strideq*1]
1562*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m6, [dstq+strideq*2]
1563*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m7
1564*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m7
1565*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*2
1566*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m5, 0x30
1567*c0909341SAndroid Build Coastguard Worker    vpblendd             m4, m6, 0xc0
1568*c0909341SAndroid Build Coastguard Worker    paddw                m4, m0
1569*c0909341SAndroid Build Coastguard Worker    pmaxsw               m4, m7
1570*c0909341SAndroid Build Coastguard Worker    pminsw               m4, m8
1571*c0909341SAndroid Build Coastguard Worker    vextracti128        xm5, m4, 1
1572*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm4
1573*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*1], xm5
1574*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*2], xm5
1575*c0909341SAndroid Build Coastguard Worker    movhps [dstq+r6       ], xm4
1576*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1577*c0909341SAndroid Build Coastguard Worker    ret
1578*c0909341SAndroid Build Coastguard Worker
1579*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct
1580*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst
1581*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst
1582*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity
1583*c0909341SAndroid Build Coastguard Worker
1584*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2
1585*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_5793]
1586*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m7, [cq+32*0]
1587*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m7, [cq+32*1]
1588*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m7, [cq+32*2]
1589*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m7, [cq+32*3]
1590*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m7, [cq+32*4]
1591*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m7, [cq+32*5]
1592*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m7, [cq+32*6]
1593*c0909341SAndroid Build Coastguard Worker    pmulld               m7,     [cq+32*7]
1594*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_6144]
1595*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m8}, m0, m4, m1, m5, m2, m6, m3, m7
1596*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m0, m4, m1, m5, m2, m6, m3, m7
1597*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1598*c0909341SAndroid Build Coastguard Worker.pass2:
1599*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
1600*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
1601*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
1602*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
1603*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pw_1697x16]
1604*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_2048]
1605*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m7, m0
1606*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m7, m1
1607*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m7, m2
1608*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, m3
1609*c0909341SAndroid Build Coastguard Worker    REPX      {paddsw x, x}, m0, m1, m2, m3
1610*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m4
1611*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m5
1612*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m6
1613*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m7
1614*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pixel_10bpc_max]
1615*c0909341SAndroid Build Coastguard Worker    call .pass2_end
1616*c0909341SAndroid Build Coastguard Worker    RET
1617*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1618*c0909341SAndroid Build Coastguard Worker.pass2_end:
1619*c0909341SAndroid Build Coastguard Worker    punpckhwd            m7, m0, m1
1620*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
1621*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
1622*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
1623*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*5]
1624*c0909341SAndroid Build Coastguard Worker    pxor                 m3, m3
1625*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m0, m2 ; 2 3   6 7
1626*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2     ; 0 1   4 5
1627*c0909341SAndroid Build Coastguard Worker    punpckldq            m6, m7, m1 ; 8 9   c d
1628*c0909341SAndroid Build Coastguard Worker    punpckhdq            m7, m1     ; a b   e f
1629*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m8
1630*c0909341SAndroid Build Coastguard Worker    call .write_2x4x2
1631*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m5, m8
1632*c0909341SAndroid Build Coastguard Worker    call .write_2x4x2
1633*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m6, m8
1634*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
1635*c0909341SAndroid Build Coastguard Worker    call .write_2x4x2
1636*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m7, m8
1637*c0909341SAndroid Build Coastguard Worker    call .write_2x4x2
1638*c0909341SAndroid Build Coastguard Worker    ret
1639*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1640*c0909341SAndroid Build Coastguard Worker.write_2x4x2:
1641*c0909341SAndroid Build Coastguard Worker    movq                xm1, [dstq+strideq*0]
1642*c0909341SAndroid Build Coastguard Worker    movhps              xm1, [dstq+strideq*1]
1643*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m2, [dstq+strideq*4]
1644*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m2, 0x30
1645*c0909341SAndroid Build Coastguard Worker    vpbroadcastq         m2, [dstq+r6       ]
1646*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m2, 0xc0
1647*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m3
1648*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m3
1649*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*2
1650*c0909341SAndroid Build Coastguard Worker    paddw                m1, m0
1651*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m3
1652*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m4
1653*c0909341SAndroid Build Coastguard Worker    vextracti128        xm2, m1, 1
1654*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*0], xm1
1655*c0909341SAndroid Build Coastguard Worker    movhps [dstq+strideq*1], xm1
1656*c0909341SAndroid Build Coastguard Worker    movq   [dstq+strideq*4], xm2
1657*c0909341SAndroid Build Coastguard Worker    movhps [dstq+r6       ], xm2
1658*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
1659*c0909341SAndroid Build Coastguard Worker    ret
1660*c0909341SAndroid Build Coastguard Worker
1661*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct,      12
1662*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity, 12
1663*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst,     12
1664*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst, 12
1665*c0909341SAndroid Build Coastguard Worker
1666*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
1667*c0909341SAndroid Build Coastguard Worker    jmp m(idct_4x16_internal_10bpc).pass1
1668*c0909341SAndroid Build Coastguard Worker.pass2:
1669*c0909341SAndroid Build Coastguard Worker    punpckldq            m8, m0, m1
1670*c0909341SAndroid Build Coastguard Worker    punpckhdq            m0, m1
1671*c0909341SAndroid Build Coastguard Worker    punpckldq            m9, m2, m3
1672*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m3
1673*c0909341SAndroid Build Coastguard Worker    punpckldq            m1, m4, m5
1674*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m5
1675*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m6, m7
1676*c0909341SAndroid Build Coastguard Worker    punpckhdq            m6, m7
1677*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m0, m2         ;  2  6
1678*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m12, m0, m2         ;  3  7
1679*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m8, m9         ;  0  4
1680*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m10, m8, m9         ;  1  5
1681*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m1, m3         ;  8 12
1682*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m13, m1, m3         ;  9 13
1683*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m9, m4, m6         ; 10 14
1684*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m6             ; 11 15
1685*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1,  m5,  m9, 0x20 ;  2 10
1686*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3,  m9,  m5, 0x31 ; 14  6
1687*c0909341SAndroid Build Coastguard Worker    vpermq              m11,  m4, q1302     ; 15 11
1688*c0909341SAndroid Build Coastguard Worker    ; interleave
1689*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3, m10
1690*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
1691*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
1692*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3, m10, m11, m12, m13
1693*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3, m10, m11, m12, m13
1694*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).pass1_main
1695*c0909341SAndroid Build Coastguard Worker    vpermq               m6, m12, q1302 ;  7  3
1696*c0909341SAndroid Build Coastguard Worker    vpermq               m5, m13, q3120 ;  9 13
1697*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).pass1_main2
1698*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).pass1_main3
1699*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 3}, m0, m1, m2, m3, m4, m5, m6, m7
1700*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
1701*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m2, m3
1702*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m4, m5
1703*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m6, m7
1704*c0909341SAndroid Build Coastguard Worker    mova                 m4, [idct16_12_shuf]
1705*c0909341SAndroid Build Coastguard Worker    REPX  {vpermd x, m4, x}, m0, m1, m2, m3
1706*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pw_16384]
1707*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pixel_12bpc_max]
1708*c0909341SAndroid Build Coastguard Worker    call m(idct_4x16_internal_10bpc).pass2_end
1709*c0909341SAndroid Build Coastguard Worker    RET
1710*c0909341SAndroid Build Coastguard Worker
1711*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct,      12
1712*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst,     12
1713*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst, 12
1714*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity, 12
1715*c0909341SAndroid Build Coastguard Worker
1716*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
1717*c0909341SAndroid Build Coastguard Worker    call .main_pass1
1718*c0909341SAndroid Build Coastguard Worker    psrad                m0, m4, 12
1719*c0909341SAndroid Build Coastguard Worker    psrad                m1, m5, 12
1720*c0909341SAndroid Build Coastguard Worker    psrad                m2, 12
1721*c0909341SAndroid Build Coastguard Worker    psrad                m3, 12
1722*c0909341SAndroid Build Coastguard Worker    psrad                m4, m8, 12
1723*c0909341SAndroid Build Coastguard Worker    psrad                m5, m9, 12
1724*c0909341SAndroid Build Coastguard Worker    psrad                m6, 12
1725*c0909341SAndroid Build Coastguard Worker    psrad                m7, 12
1726*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1727*c0909341SAndroid Build Coastguard Worker.pass2:
1728*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
1729*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
1730*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
1731*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
1732*c0909341SAndroid Build Coastguard Worker    call .transpose_16x4
1733*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).main2
1734*c0909341SAndroid Build Coastguard Worker    pshufd               m4, m5, q1032
1735*c0909341SAndroid Build Coastguard Worker    psrad                m5, m6, 3
1736*c0909341SAndroid Build Coastguard Worker    pshufd               m6, m7, q1032
1737*c0909341SAndroid Build Coastguard Worker    psrad                m7, m8, 3
1738*c0909341SAndroid Build Coastguard Worker    REPX {pshufd x, x, q1032}, m0, m2
1739*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 3}, m0, m1, m2, m3, m4, m6
1740*c0909341SAndroid Build Coastguard Worker.pass2_end:
1741*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
1742*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m2, m3
1743*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m4, m5
1744*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m6, m7
1745*c0909341SAndroid Build Coastguard Worker    mova                 m4, [iadst16_12_shuf]
1746*c0909341SAndroid Build Coastguard Worker    REPX  {vpermd x, m4, x}, m0, m1, m2, m3
1747*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pw_16384]
1748*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pixel_12bpc_max]
1749*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
1750*c0909341SAndroid Build Coastguard Worker    pxor                 m7, m7
1751*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m9
1752*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).write_4x4
1753*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m9, m1
1754*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).write_4x4
1755*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m9, m2
1756*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).write_4x4
1757*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m9, m3
1758*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).write_4x4
1759*c0909341SAndroid Build Coastguard Worker    RET
1760*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1761*c0909341SAndroid Build Coastguard Worker.transpose_16x4:
1762*c0909341SAndroid Build Coastguard Worker    ; transpose & interleave
1763*c0909341SAndroid Build Coastguard Worker    punpckldq            m8, m0, m1
1764*c0909341SAndroid Build Coastguard Worker    punpckhdq            m0, m1
1765*c0909341SAndroid Build Coastguard Worker    punpckldq            m9, m2, m3
1766*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m3
1767*c0909341SAndroid Build Coastguard Worker    punpckldq            m1, m4, m5
1768*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m5
1769*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m6, m7
1770*c0909341SAndroid Build Coastguard Worker    punpckhdq            m6, m7
1771*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m10, m8, m0
1772*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m0, m8
1773*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m11, m9, m2
1774*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m9
1775*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m1, m4
1776*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m1
1777*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m9, m3, m6
1778*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m3
1779*c0909341SAndroid Build Coastguard Worker    vperm2i128           m5,  m0,  m2, 0x31   ;  7  5
1780*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7,  m0,  m2, 0x20   ;  3  1
1781*c0909341SAndroid Build Coastguard Worker    vperm2i128           m0, m10, m11, 0x20   ;  0  2
1782*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m10, m11, 0x31   ;  4  6
1783*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1,  m4,  m6, 0x31   ; 15 13
1784*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3,  m4,  m6, 0x20   ; 11  9
1785*c0909341SAndroid Build Coastguard Worker    vperm2i128           m4,  m8,  m9, 0x20   ;  8 10
1786*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6,  m8,  m9, 0x31   ; 12 14
1787*c0909341SAndroid Build Coastguard Worker    ret
1788*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1789*c0909341SAndroid Build Coastguard Worker.main_pass1:
1790*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main
1791*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_3072]
1792*c0909341SAndroid Build Coastguard Worker    paddd               m10, m4, m5
1793*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3
1794*c0909341SAndroid Build Coastguard Worker    psubd                m5, m3
1795*c0909341SAndroid Build Coastguard Worker    paddd                m3, m10
1796*c0909341SAndroid Build Coastguard Worker    psubd                m8, m7, m1
1797*c0909341SAndroid Build Coastguard Worker    paddd                m7, m9
1798*c0909341SAndroid Build Coastguard Worker    psubd                m9, m1
1799*c0909341SAndroid Build Coastguard Worker    paddd                m7, m1
1800*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 1 }, m4, m5, m2, m3, m8, m9, m0, m7
1801*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m6}, m4, m5, m2, m3, m8, m9, m7
1802*c0909341SAndroid Build Coastguard Worker    paddd                m6, m0
1803*c0909341SAndroid Build Coastguard Worker    ret
1804*c0909341SAndroid Build Coastguard Worker
1805*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct,      12
1806*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst,     12
1807*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst, 12
1808*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity, 12
1809*c0909341SAndroid Build Coastguard Worker
1810*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
1811*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_12bpc).main_pass1
1812*c0909341SAndroid Build Coastguard Worker    psrad                m0, m3, 12
1813*c0909341SAndroid Build Coastguard Worker    psrad                m1, m2, 12
1814*c0909341SAndroid Build Coastguard Worker    psrad                m2, m5, 12
1815*c0909341SAndroid Build Coastguard Worker    psrad                m3, m4, 12
1816*c0909341SAndroid Build Coastguard Worker    psrad                m4, m7, 12
1817*c0909341SAndroid Build Coastguard Worker    psrad                m5, m6, 12
1818*c0909341SAndroid Build Coastguard Worker    psrad                m6, m9, 12
1819*c0909341SAndroid Build Coastguard Worker    psrad                m7, m8, 12
1820*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1821*c0909341SAndroid Build Coastguard Worker.pass2:
1822*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
1823*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
1824*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
1825*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
1826*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_12bpc).transpose_16x4
1827*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).main2
1828*c0909341SAndroid Build Coastguard Worker    pshufd               m4, m3, q1032
1829*c0909341SAndroid Build Coastguard Worker    psrad                m3, m5, 3
1830*c0909341SAndroid Build Coastguard Worker    psrad                m5, m2, 3
1831*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m6, q1032
1832*c0909341SAndroid Build Coastguard Worker    pshufd               m6, m1, q1032
1833*c0909341SAndroid Build Coastguard Worker    psrad                m1, m7, 3
1834*c0909341SAndroid Build Coastguard Worker    psrad                m7, m0, 3
1835*c0909341SAndroid Build Coastguard Worker    pshufd               m0, m8, q1032
1836*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 3}, m0, m2, m4, m6
1837*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_4x16_internal_12bpc).pass2_end
1838*c0909341SAndroid Build Coastguard Worker
1839*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct,      12
1840*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst,     12
1841*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst, 12
1842*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity, 12
1843*c0909341SAndroid Build Coastguard Worker
1844*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
1845*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_1697]
1846*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
1847*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*1]
1848*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*2]
1849*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*3]
1850*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_6144]
1851*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m8, m0
1852*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m8, m4
1853*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m8, m1
1854*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m8, m5
1855*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+32*4]
1856*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+32*5]
1857*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32*6]
1858*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*7]
1859*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m2, m6, m3, m7
1860*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m2, m6, m3, m7
1861*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2
1862*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m8, m10
1863*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6
1864*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m8, m11
1865*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3
1866*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m8, m12
1867*c0909341SAndroid Build Coastguard Worker    paddd                m5, m7
1868*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m8, m13
1869*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 1 }, m0, m4, m1, m5
1870*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m2, m6, m3, m7
1871*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m2, m6, m3, m7
1872*c0909341SAndroid Build Coastguard Worker    paddd                m2, m10
1873*c0909341SAndroid Build Coastguard Worker    paddd                m6, m11
1874*c0909341SAndroid Build Coastguard Worker    paddd                m3, m12
1875*c0909341SAndroid Build Coastguard Worker    paddd                m7, m13
1876*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 1 }, m2, m6, m3, m7
1877*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1878*c0909341SAndroid Build Coastguard Worker.pass2:
1879*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
1880*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
1881*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
1882*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
1883*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5793]
1884*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_1024]
1885*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7
1886*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m0, m1, m2, m3, m4, m5, m6, m7
1887*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 14}, m0, m1, m2, m3, m4, m5, m6, m7
1888*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
1889*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
1890*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
1891*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
1892*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_16384]
1893*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pixel_12bpc_max]
1894*c0909341SAndroid Build Coastguard Worker    call m(iidentity_4x16_internal_10bpc).pass2_end
1895*c0909341SAndroid Build Coastguard Worker    RET
1896*c0909341SAndroid Build Coastguard Worker
1897*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X4_FN 2-3 10 ; type1, type2, bitdepth
1898*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 8x4, %3
1899*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
1900*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m2, [dconly_%3bpc]
1901*c0909341SAndroid Build Coastguard Worker%if %3 = 10
1902*c0909341SAndroid Build Coastguard Worker.dconly:
1903*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
1904*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
1905*c0909341SAndroid Build Coastguard Worker    or                  r3d, 4
1906*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
1907*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
1908*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
1909*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
1910*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
1911*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3
1912*c0909341SAndroid Build Coastguard Worker%else
1913*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x4_10bpc).dconly
1914*c0909341SAndroid Build Coastguard Worker%endif
1915*c0909341SAndroid Build Coastguard Worker%endif
1916*c0909341SAndroid Build Coastguard Worker%endmacro
1917*c0909341SAndroid Build Coastguard Worker
1918*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct
1919*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity
1920*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst
1921*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst
1922*c0909341SAndroid Build Coastguard Worker
1923*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2
1924*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
1925*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
1926*c0909341SAndroid Build Coastguard Worker.pass1:
1927*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m1, [cq+16*1]
1928*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m0, [cq+16*5]
1929*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16*3]
1930*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m3, [cq+16*7]
1931*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_2896]
1932*c0909341SAndroid Build Coastguard Worker    shufpd               m1, m0, 0x0c ; 1 5
1933*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m2, 0x0c ; 7 3
1934*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m0, [cq+16*0]
1935*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16*2]
1936*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16*4]
1937*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16*6]
1938*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
1939*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x0c ; 0 2
1940*c0909341SAndroid Build Coastguard Worker    shufpd               m2, m5, 0x0c ; 4 6
1941*c0909341SAndroid Build Coastguard Worker    REPX {pmulld x, m6}, m1, m3, m0, m2
1942*c0909341SAndroid Build Coastguard Worker    REPX {paddd  x, m7}, m1, m3, m0, m2
1943*c0909341SAndroid Build Coastguard Worker    REPX {psrad  x, 12}, m1, m3, m0, m2
1944*c0909341SAndroid Build Coastguard Worker    call .main
1945*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m4  ; out7 out6 (interleaved)
1946*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4      ; out0 out1 (interleaved)
1947*c0909341SAndroid Build Coastguard Worker    paddd                m1, m2, m5  ; out3 out2 (interleaved)
1948*c0909341SAndroid Build Coastguard Worker    psubd                m2, m5      ; out4 out5 (interleaved)
1949*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m1, q1032
1950*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m3, q1032
1951*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1952*c0909341SAndroid Build Coastguard Worker.pass2:
1953*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [deint_shuf]
1954*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
1955*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
1956*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x31
1957*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm2, 1
1958*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m4
1959*c0909341SAndroid Build Coastguard Worker    pshufb               m1, m4
1960*c0909341SAndroid Build Coastguard Worker    IDCT4_1D_PACKED_WORD  0, 1, 2, 3, 4, 7
1961*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120 ; out0 out1
1962*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m1, q2031 ; out2 out3
1963*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_10bpc).end
1964*c0909341SAndroid Build Coastguard WorkerALIGN function_align
1965*c0909341SAndroid Build Coastguard Worker.main:
1966*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 3, 4, 5, 6, 7, 799_3406, 4017_2276, 1
1967*c0909341SAndroid Build Coastguard Worker    IDCT4_1D_PACKED       0, 2, 4, 5, 6, 7
1968*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_2896]
1969*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m1, m3   ; t4a  t7a
1970*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m3       ; t5a  t6a
1971*c0909341SAndroid Build Coastguard Worker    psubd                m3, m4, m1   ; t5a  t6a
1972*c0909341SAndroid Build Coastguard Worker    paddd                m4, m1       ; t4   t7
1973*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m3, m4, m0, m2
1974*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m3, m4, m0, m2
1975*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m6
1976*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m3, q1032
1977*c0909341SAndroid Build Coastguard Worker    paddd                m3, m7
1978*c0909341SAndroid Build Coastguard Worker    psubd                m5, m3, m1
1979*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3
1980*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12
1981*c0909341SAndroid Build Coastguard Worker    psrad                m1, 12
1982*c0909341SAndroid Build Coastguard Worker    vpblendd             m5, m4, 0x33 ; t4   t5
1983*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m1       ; t7   t6
1984*c0909341SAndroid Build Coastguard Worker    ret
1985*c0909341SAndroid Build Coastguard Worker
1986*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct
1987*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst
1988*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst
1989*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity
1990*c0909341SAndroid Build Coastguard Worker
1991*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2
1992*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_10bpc).main
1993*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, m4, 0x33 ; out6 out7
1994*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m4, 0xcc     ; out0 out1
1995*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m5, q1032
1996*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6           ; out4 out5
1997*c0909341SAndroid Build Coastguard Worker    psignd               m1, m6           ; out2 out3
1998*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
1999*c0909341SAndroid Build Coastguard Worker.pass2:
2000*c0909341SAndroid Build Coastguard Worker    call .pass2_main
2001*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120 ; out0 out1
2002*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m1, q3120 ; out2 out3
2003*c0909341SAndroid Build Coastguard Worker.end:
2004*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pw_2048]
2005*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m1
2006*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m2
2007*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_10bpc_max]
2008*c0909341SAndroid Build Coastguard Worker.end2:
2009*c0909341SAndroid Build Coastguard Worker    mova                xm2, [dstq+strideq*0]
2010*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, [dstq+strideq*1], 1
2011*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
2012*c0909341SAndroid Build Coastguard Worker    mova                xm3, [r6  +strideq*0]
2013*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, [r6  +strideq*1], 1
2014*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
2015*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
2016*c0909341SAndroid Build Coastguard Worker    paddw                m0, m2
2017*c0909341SAndroid Build Coastguard Worker    paddw                m1, m3
2018*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m4
2019*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m4
2020*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m5
2021*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m5
2022*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm0
2023*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+strideq*1], m0, 1
2024*c0909341SAndroid Build Coastguard Worker    mova         [r6  +strideq*0], xm1
2025*c0909341SAndroid Build Coastguard Worker    vextracti128 [r6  +strideq*1], m1, 1
2026*c0909341SAndroid Build Coastguard Worker    RET
2027*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2028*c0909341SAndroid Build Coastguard Worker.pass2_main:
2029*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [deint_shuf]
2030*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
2031*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
2032*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
2033*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x31
2034*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm2, 1
2035*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m4
2036*c0909341SAndroid Build Coastguard Worker    pshufb               m1, m4
2037*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_8bpc).main
2038*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2039*c0909341SAndroid Build Coastguard Worker.main:
2040*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pd_2896]
2041*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m1, [cq+32*0]
2042*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m1, [cq+32*3]
2043*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m1, [cq+32*2]
2044*c0909341SAndroid Build Coastguard Worker    pmulld               m1,     [cq+32*1]
2045*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_2048]
2046*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m4}, m0, m3, m2, m1
2047*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m0, m3, m2, m1
2048*c0909341SAndroid Build Coastguard Worker.main2:
2049*c0909341SAndroid Build Coastguard Worker    IADST4_1D
2050*c0909341SAndroid Build Coastguard Worker    ret
2051*c0909341SAndroid Build Coastguard Worker
2052*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct
2053*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst
2054*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst
2055*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity
2056*c0909341SAndroid Build Coastguard Worker
2057*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_10bpc, 0, 5, 10, dst, stride, c, eob, tx2
2058*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_10bpc).main
2059*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m4, m0, 0x05
2060*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x05
2061*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6
2062*c0909341SAndroid Build Coastguard Worker    pshufd               m6, m6, q1032
2063*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m2, q1032
2064*c0909341SAndroid Build Coastguard Worker    psignd               m2, m5, m6
2065*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2066*c0909341SAndroid Build Coastguard Worker.pass2:
2067*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_10bpc).pass2_main
2068*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m0, q2031
2069*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m1, q2031
2070*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_10bpc).end
2071*c0909341SAndroid Build Coastguard Worker
2072*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct
2073*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst
2074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst
2075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity
2076*c0909341SAndroid Build Coastguard Worker
2077*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2
2078*c0909341SAndroid Build Coastguard Worker.pass1:
2079*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_2896]
2080*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [cq+32*0], q3120
2081*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [cq+32*1], q3120
2082*c0909341SAndroid Build Coastguard Worker    vpermq               m2, [cq+32*2], q3120
2083*c0909341SAndroid Build Coastguard Worker    vpermq               m3, [cq+32*3], q3120
2084*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
2085*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m4}, m0, m1, m2, m3
2086*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m7}, m0, m1, m2, m3
2087*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m0, m1, m2, m3
2088*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, x }, m0, m1, m2, m3
2089*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2090*c0909341SAndroid Build Coastguard Worker.pass2:
2091*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_10bpc_max]
2092*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_1697x8]
2093*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
2094*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
2095*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m4, m0
2096*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m2
2097*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m1
2098*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m4
2099*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m7 ; pw_2048
2100*c0909341SAndroid Build Coastguard Worker.pass2_end:
2101*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m2
2102*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2
2103*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
2104*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m0, m1
2105*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
2106*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m7
2107*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m7
2108*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m2
2109*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2
2110*c0909341SAndroid Build Coastguard Worker    mova                xm2, [dstq+strideq*0]
2111*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, [r6  +strideq*0], 1
2112*c0909341SAndroid Build Coastguard Worker    mova                xm3, [dstq+strideq*1]
2113*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, [r6  +strideq*1], 1
2114*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
2115*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3
2116*c0909341SAndroid Build Coastguard Worker    paddw                m0, m2
2117*c0909341SAndroid Build Coastguard Worker    paddw                m1, m3
2118*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m4
2119*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m4
2120*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m5
2121*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m5
2122*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm0
2123*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*1], xm1
2124*c0909341SAndroid Build Coastguard Worker    vextracti128 [r6  +strideq*0], m0, 1
2125*c0909341SAndroid Build Coastguard Worker    vextracti128 [r6  +strideq*1], m1, 1
2126*c0909341SAndroid Build Coastguard Worker    RET
2127*c0909341SAndroid Build Coastguard Worker
2128*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct,      12
2129*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity, 12
2130*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst,     12
2131*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst, 12
2132*c0909341SAndroid Build Coastguard Worker
2133*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
2134*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_20b_min]
2135*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_20b_max]
2136*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x4_internal_10bpc).pass1
2137*c0909341SAndroid Build Coastguard Worker.pass2:
2138*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
2139*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
2140*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
2141*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
2142*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_12bpc).transpose_4x8
2143*c0909341SAndroid Build Coastguard Worker    IDCT4_1D              0, 1, 2, 3, 4, 5, 6, 7
2144*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_12bpc).end
2145*c0909341SAndroid Build Coastguard Worker
2146*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct,      12
2147*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst,     12
2148*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst, 12
2149*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity, 12
2150*c0909341SAndroid Build Coastguard Worker
2151*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
2152*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_20b_min]
2153*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_20b_max]
2154*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_10bpc).main2
2155*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m0, m4, 0x33 ; out6 out7
2156*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m4, 0xcc     ; out0 out1
2157*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m5, q1032
2158*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6           ; out4 out5
2159*c0909341SAndroid Build Coastguard Worker    psignd               m1, m6           ; out2 out3
2160*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2161*c0909341SAndroid Build Coastguard Worker.pass2:
2162*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
2163*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
2164*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
2165*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
2166*c0909341SAndroid Build Coastguard Worker    call .pass2_main
2167*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
2168*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5, m4
2169*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5, m6
2170*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5
2171*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5
2172*c0909341SAndroid Build Coastguard Worker.pass2_end:
2173*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m0, m1, m2, m3
2174*c0909341SAndroid Build Coastguard Worker.end:
2175*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_16384]
2176*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 3}, m0, m1, m2, m3
2177*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
2178*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
2179*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
2180*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m2, m4
2181*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120 ; out0 out1
2182*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q3120 ; out2 out3
2183*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
2184*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_10bpc).end2
2185*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2186*c0909341SAndroid Build Coastguard Worker.pass2_main:
2187*c0909341SAndroid Build Coastguard Worker    call .transpose_4x8
2188*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_10bpc).main2
2189*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2190*c0909341SAndroid Build Coastguard Worker.transpose_4x8:
2191*c0909341SAndroid Build Coastguard Worker    ; deinterleave
2192*c0909341SAndroid Build Coastguard Worker    pshufd               m0, m0, q3120
2193*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m1, q3120
2194*c0909341SAndroid Build Coastguard Worker    pshufd               m2, m2, q3120
2195*c0909341SAndroid Build Coastguard Worker    pshufd               m3, m3, q3120
2196*c0909341SAndroid Build Coastguard Worker    ; transpose
2197*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m0, m1
2198*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m0, m1
2199*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m2, m3
2200*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m3
2201*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x20   ; out1
2202*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m0, m2, 0x31   ; out3
2203*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m4, m5, 0x31   ; out2
2204*c0909341SAndroid Build Coastguard Worker    vperm2i128           m0, m4, m5, 0x20   ; out0
2205*c0909341SAndroid Build Coastguard Worker    ret
2206*c0909341SAndroid Build Coastguard Worker
2207*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct,      12
2208*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst,     12
2209*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst, 12
2210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity, 12
2211*c0909341SAndroid Build Coastguard Worker
2212*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_12bpc, 0, 5, 10, dst, stride, c, eob, tx2
2213*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_20b_min]
2214*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_20b_max]
2215*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x8_internal_10bpc).main2
2216*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m4, m0, 0x05
2217*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x05
2218*c0909341SAndroid Build Coastguard Worker    psignd               m2, m6
2219*c0909341SAndroid Build Coastguard Worker    pshufd               m6, m6, q1032
2220*c0909341SAndroid Build Coastguard Worker    pshufd               m1, m2, q1032
2221*c0909341SAndroid Build Coastguard Worker    psignd               m2, m5, m6
2222*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2223*c0909341SAndroid Build Coastguard Worker.pass2:
2224*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
2225*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
2226*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
2227*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
2228*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_12bpc).pass2_main
2229*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_2048]
2230*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5, m3
2231*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5, m2
2232*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5, m4
2233*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m6
2234*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x4_internal_12bpc).pass2_end
2235*c0909341SAndroid Build Coastguard Worker
2236*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct,      12
2237*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst,     12
2238*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst, 12
2239*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity, 12
2240*c0909341SAndroid Build Coastguard Worker
2241*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
2242*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x4_internal_10bpc).pass1
2243*c0909341SAndroid Build Coastguard Worker.pass2:
2244*c0909341SAndroid Build Coastguard Worker    ; m0 = in0 in1 (interleaved)
2245*c0909341SAndroid Build Coastguard Worker    ; m1 = in2 in3 (interleaved)
2246*c0909341SAndroid Build Coastguard Worker    ; m2 = in4 in5 (interleaved)
2247*c0909341SAndroid Build Coastguard Worker    ; m3 = in6 in7 (interleaved)
2248*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
2249*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
2250*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m0, m1, m2, m3
2251*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m0, m1, m2, m3
2252*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_5793]
2253*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m4}, m0, m1, m2, m3
2254*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m7}, m0, m1, m2, m3
2255*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 15}, m0, m1, m2, m3
2256*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
2257*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pw_16384]
2258*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
2259*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
2260*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x4_internal_10bpc).pass2_end
2261*c0909341SAndroid Build Coastguard Worker
2262*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2-3 10 ; type1, type2, bitdepth
2263*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 8x8, %3
2264*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
2265*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m2, [dconly_%3bpc]
2266*c0909341SAndroid Build Coastguard Worker%if %3 = 10
2267*c0909341SAndroid Build Coastguard Worker.dconly:
2268*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
2269*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
2270*c0909341SAndroid Build Coastguard Worker    or                  r3d, 8
2271*c0909341SAndroid Build Coastguard Worker.dconly2:
2272*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
2273*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
2274*c0909341SAndroid Build Coastguard Worker.dconly3:
2275*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
2276*c0909341SAndroid Build Coastguard Worker    add                 r6d, 2176
2277*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 12
2278*c0909341SAndroid Build Coastguard Worker    movd                xm0, r6d
2279*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm2
2280*c0909341SAndroid Build Coastguard Worker    vpbroadcastw         m0, xm0
2281*c0909341SAndroid Build Coastguard Worker.dconly_loop:
2282*c0909341SAndroid Build Coastguard Worker    mova                xm1, [dstq+strideq*0]
2283*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, [dstq+strideq*1], 1
2284*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m0
2285*c0909341SAndroid Build Coastguard Worker    psubusw              m1, m2
2286*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm1
2287*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+strideq*1], m1, 1
2288*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
2289*c0909341SAndroid Build Coastguard Worker    sub                 r3d, 2
2290*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
2291*c0909341SAndroid Build Coastguard Worker    RET
2292*c0909341SAndroid Build Coastguard Worker%else
2293*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly
2294*c0909341SAndroid Build Coastguard Worker%endif
2295*c0909341SAndroid Build Coastguard Worker%endif
2296*c0909341SAndroid Build Coastguard Worker%endmacro
2297*c0909341SAndroid Build Coastguard Worker
2298*c0909341SAndroid Build Coastguard Worker%macro IADST8_1D 14 ; src[1-8], tmp[1-3], pd_2048, clip[1-2]
2299*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %8, %1, %9, %10, %11, %12,  401, 4076 ; t1a, t0a
2300*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %2, %7, %9, %10, %11, %12, 3920, 1189 ; t7a, t6a
2301*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %6, %3, %9, %10, %11, %12, 1931, 3612 ; t3a, t2a
2302*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %4, %5, %9, %10, %11, %12, 3166, 2598 ; t5a, t4a
2303*c0909341SAndroid Build Coastguard Worker    psubd               m%9, m%3, m%7 ; t6
2304*c0909341SAndroid Build Coastguard Worker    paddd               m%3, m%7      ; t2
2305*c0909341SAndroid Build Coastguard Worker    psubd               m%7, m%1, m%5 ; t4
2306*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%5      ; t0
2307*c0909341SAndroid Build Coastguard Worker    psubd               m%5, m%6, m%2 ; t7
2308*c0909341SAndroid Build Coastguard Worker    paddd               m%6, m%2      ; t3
2309*c0909341SAndroid Build Coastguard Worker    psubd               m%2, m%8, m%4 ; t5
2310*c0909341SAndroid Build Coastguard Worker    paddd               m%8, m%4      ; t1
2311*c0909341SAndroid Build Coastguard Worker    REPX   {pmaxsd x, m%13}, m%7, m%2, m%9, m%5, m%3, m%1, m%6, m%8
2312*c0909341SAndroid Build Coastguard Worker    REPX   {pminsd x, m%14}, m%7, m%2, m%9, m%5, m%3, m%1, m%6, m%8
2313*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %7, %2, %4, %10, %11, %12, 1567, 3784 ; t5a, t4a
2314*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        %5, %9, %4, %10, %11, %12, 3784, %11  ; t6a, t7a
2315*c0909341SAndroid Build Coastguard Worker    psubd              m%10, m%7, m%9 ;  t7
2316*c0909341SAndroid Build Coastguard Worker    paddd               m%7, m%9      ;  out6
2317*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%9, [pd_1448]
2318*c0909341SAndroid Build Coastguard Worker    psubd               m%4, m%8, m%6 ;  t3
2319*c0909341SAndroid Build Coastguard Worker    paddd               m%8, m%6      ; -out7
2320*c0909341SAndroid Build Coastguard Worker    psubd               m%6, m%1, m%3 ;  t2
2321*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%3      ;  out0
2322*c0909341SAndroid Build Coastguard Worker    psubd               m%3, m%2, m%5 ;  t6
2323*c0909341SAndroid Build Coastguard Worker    paddd               m%2, m%5      ; -out1
2324*c0909341SAndroid Build Coastguard Worker    REPX   {pmaxsd x, m%13}, m%6, m%4, m%3, m%10
2325*c0909341SAndroid Build Coastguard Worker    REPX   {pminsd x, m%14}, m%6, m%4, m%3, m%10
2326*c0909341SAndroid Build Coastguard Worker    REPX   {pmulld x, m%9 }, m%6, m%4, m%3, m%10
2327*c0909341SAndroid Build Coastguard Worker    psubd               m%5, m%6, m%4  ; (t2 - t3) * 1448
2328*c0909341SAndroid Build Coastguard Worker    paddd               m%4, m%6       ; (t2 + t3) * 1448
2329*c0909341SAndroid Build Coastguard Worker    psubd               m%6, m%3, m%10 ; (t6 - t7) * 1448
2330*c0909341SAndroid Build Coastguard Worker    paddd               m%3, m%10      ; (t6 + t7) * 1448
2331*c0909341SAndroid Build Coastguard Worker%endmacro
2332*c0909341SAndroid Build Coastguard Worker
2333*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct
2334*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity
2335*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst
2336*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst
2337*c0909341SAndroid Build Coastguard Worker
2338*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
2339*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2340*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2341*c0909341SAndroid Build Coastguard Worker.pass1:
2342*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
2343*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*1]
2344*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*2]
2345*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*3]
2346*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*4]
2347*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*5]
2348*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*6]
2349*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*7]
2350*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
2351*c0909341SAndroid Build Coastguard Worker    call .main
2352*c0909341SAndroid Build Coastguard Worker    call .round_shift1
2353*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2354*c0909341SAndroid Build Coastguard Worker.pass2:
2355*c0909341SAndroid Build Coastguard Worker    call .transpose_8x8_packed
2356*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_8bpc).main
2357*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
2358*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
2359*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
2360*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3120
2361*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m3, q2031
2362*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
2363*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
2364*c0909341SAndroid Build Coastguard Worker    call .write_8x4_start
2365*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m12
2366*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, m12
2367*c0909341SAndroid Build Coastguard Worker    call .write_8x4
2368*c0909341SAndroid Build Coastguard Worker    RET
2369*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2370*c0909341SAndroid Build Coastguard Worker.write_8x4_start:
2371*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pixel_10bpc_max]
2372*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
2373*c0909341SAndroid Build Coastguard Worker    pxor                m10, m10
2374*c0909341SAndroid Build Coastguard Worker.write_8x4:
2375*c0909341SAndroid Build Coastguard Worker    mova                xm8, [dstq+strideq*0]
2376*c0909341SAndroid Build Coastguard Worker    vinserti128          m8, [dstq+strideq*1], 1
2377*c0909341SAndroid Build Coastguard Worker    mova                xm9, [dstq+strideq*2]
2378*c0909341SAndroid Build Coastguard Worker    vinserti128          m9, [dstq+r6       ], 1
2379*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m10
2380*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m10
2381*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*2], m10
2382*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*3], m10
2383*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*4
2384*c0909341SAndroid Build Coastguard Worker    paddw                m0, m8
2385*c0909341SAndroid Build Coastguard Worker    paddw                m1, m9
2386*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m10
2387*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m10
2388*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m11
2389*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m11
2390*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm0
2391*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+strideq*1], m0, 1
2392*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*2], xm1
2393*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+r6       ], m1, 1
2394*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
2395*c0909341SAndroid Build Coastguard Worker    ret
2396*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2397*c0909341SAndroid Build Coastguard Worker.transpose_8x8_packed:
2398*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
2399*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
2400*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
2401*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
2402*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
2403*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
2404*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
2405*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
2406*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
2407*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m2
2408*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
2409*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m4, m1
2410*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m1
2411*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, m3, xm2, 1
2412*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m2, 0x31
2413*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m0, m4, 0x31
2414*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm4, 1
2415*c0909341SAndroid Build Coastguard Worker    ret
2416*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2417*c0909341SAndroid Build Coastguard Worker.main_rect2:
2418*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
2419*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
2420*c0909341SAndroid Build Coastguard Worker.main:
2421*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 3, 8, 9, 10, 11, 3406, 2276 ; t5a t6a
2422*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 7, 8, 9, 10, 11,  799, 4017 ; t4a t7a
2423*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 6, 8, 9, 10, 11, 1567, 3784 ; t2  t3
2424*c0909341SAndroid Build Coastguard Worker    paddd                m8, m1, m5 ; t4
2425*c0909341SAndroid Build Coastguard Worker    psubd                m1, m5     ; t5a
2426*c0909341SAndroid Build Coastguard Worker    paddd                m9, m7, m3 ; t7
2427*c0909341SAndroid Build Coastguard Worker    psubd                m7, m3     ; t6a
2428*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [pd_2896]
2429*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m1, m8, m7, m9
2430*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m1, m8, m7, m9
2431*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m3 }, m0, m4, m7, m1
2432*c0909341SAndroid Build Coastguard Worker    paddd                m0, m11
2433*c0909341SAndroid Build Coastguard Worker    paddd                m7, m11
2434*c0909341SAndroid Build Coastguard Worker    psubd                m5, m0, m4
2435*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4
2436*c0909341SAndroid Build Coastguard Worker    psubd                m4, m7, m1
2437*c0909341SAndroid Build Coastguard Worker    paddd                m7, m1
2438*c0909341SAndroid Build Coastguard Worker    REPX    {psrad  x, 12 }, m5, m0, m4, m7
2439*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m6 ; dct4 out3
2440*c0909341SAndroid Build Coastguard Worker    paddd                m0, m6     ; dct4 out0
2441*c0909341SAndroid Build Coastguard Worker    paddd                m6, m5, m2 ; dct4 out1
2442*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2     ; dct4 out2
2443*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m6, m5, m3
2444*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m6, m5, m3
2445*c0909341SAndroid Build Coastguard Worker    ret
2446*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2447*c0909341SAndroid Build Coastguard Worker.round_shift1:
2448*c0909341SAndroid Build Coastguard Worker    pcmpeqd              m1, m1
2449*c0909341SAndroid Build Coastguard Worker    REPX      {psubd x, m1}, m0, m6, m5, m3
2450*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6, m7 ; out1
2451*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7     ; out6
2452*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m9 ; out7
2453*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9     ; out0
2454*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m4 ; out2
2455*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4     ; out5
2456*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3, m8 ; out4
2457*c0909341SAndroid Build Coastguard Worker    paddd                m3, m8     ; out3
2458*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 1 }, m0, m1, m2, m3, m4, m5, m6, m7
2459*c0909341SAndroid Build Coastguard Worker    ret
2460*c0909341SAndroid Build Coastguard Worker
2461*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct
2462*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst
2463*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst
2464*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity
2465*c0909341SAndroid Build Coastguard Worker
2466*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
2467*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2468*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2469*c0909341SAndroid Build Coastguard Worker.pass1:
2470*c0909341SAndroid Build Coastguard Worker    call .main
2471*c0909341SAndroid Build Coastguard Worker    call .main_end
2472*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2473*c0909341SAndroid Build Coastguard Worker.pass2:
2474*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).transpose_8x8_packed
2475*c0909341SAndroid Build Coastguard Worker    pshufd               m4, m0, q1032
2476*c0909341SAndroid Build Coastguard Worker    pshufd               m5, m1, q1032
2477*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main_pass2
2478*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_2048]
2479*c0909341SAndroid Build Coastguard Worker    vpbroadcastd       xm12, [pw_4096]
2480*c0909341SAndroid Build Coastguard Worker    psubw               m12, m5
2481*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3
2482*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
2483*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
2484*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4_start
2485*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m12
2486*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, m12
2487*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2488*c0909341SAndroid Build Coastguard Worker    RET
2489*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2490*c0909341SAndroid Build Coastguard Worker.main:
2491*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
2492*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*7]
2493*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*1]
2494*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*6]
2495*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*2]
2496*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*5]
2497*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*3]
2498*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*4]
2499*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
2500*c0909341SAndroid Build Coastguard Worker.main2:
2501*c0909341SAndroid Build Coastguard Worker    IADST8_1D             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
2502*c0909341SAndroid Build Coastguard Worker    psrld                m8, 10 ; pd_1
2503*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_3072]
2504*c0909341SAndroid Build Coastguard Worker    ret
2505*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2506*c0909341SAndroid Build Coastguard Worker.main_end:
2507*c0909341SAndroid Build Coastguard Worker    paddd                m0, m8
2508*c0909341SAndroid Build Coastguard Worker    psubd                m1, m8, m1
2509*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8
2510*c0909341SAndroid Build Coastguard Worker    psubd                m7, m8, m7
2511*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 1 }, m0, m1, m6, m7
2512*c0909341SAndroid Build Coastguard Worker    ; (1 + ((x + 1024) >> 11)) >> 1 = (3072 + x) >> 12
2513*c0909341SAndroid Build Coastguard Worker    ; (1 - ((x + 1024) >> 11)) >> 1 = (3071 - x) >> 12
2514*c0909341SAndroid Build Coastguard Worker    psubd                m8, m9, m8 ; pd_3071
2515*c0909341SAndroid Build Coastguard Worker    paddd                m2, m9
2516*c0909341SAndroid Build Coastguard Worker    psubd                m3, m8, m3
2517*c0909341SAndroid Build Coastguard Worker    paddd                m4, m9
2518*c0909341SAndroid Build Coastguard Worker    psubd                m5, m8, m5
2519*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m2, m3, m4, m5
2520*c0909341SAndroid Build Coastguard Worker    ret
2521*c0909341SAndroid Build Coastguard Worker
2522*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct
2523*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst
2524*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst
2525*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity
2526*c0909341SAndroid Build Coastguard Worker
2527*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
2528*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2529*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2530*c0909341SAndroid Build Coastguard Worker.pass1:
2531*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_10bpc).main
2532*c0909341SAndroid Build Coastguard Worker    call .main_end
2533*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2534*c0909341SAndroid Build Coastguard Worker.pass2:
2535*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).transpose_8x8_packed
2536*c0909341SAndroid Build Coastguard Worker    pshufd               m4, m0, q1032
2537*c0909341SAndroid Build Coastguard Worker    pshufd               m5, m1, q1032
2538*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_8bpc).main_pass2
2539*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
2540*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        xm5, [pw_4096]
2541*c0909341SAndroid Build Coastguard Worker    psubw               m12, m5
2542*c0909341SAndroid Build Coastguard Worker    vpermq               m8, m3, q2031
2543*c0909341SAndroid Build Coastguard Worker    vpermq               m9, m2, q2031
2544*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m1, q2031
2545*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m0, q2031
2546*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m8, m12
2547*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m9, m12
2548*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4_start
2549*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m12
2550*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, m12
2551*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2552*c0909341SAndroid Build Coastguard Worker    RET
2553*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2554*c0909341SAndroid Build Coastguard Worker.main_end:
2555*c0909341SAndroid Build Coastguard Worker    paddd               m10, m8, m0
2556*c0909341SAndroid Build Coastguard Worker    psubd                m0, m8, m7
2557*c0909341SAndroid Build Coastguard Worker    psubd                m7, m8, m1
2558*c0909341SAndroid Build Coastguard Worker    paddd                m1, m8, m6
2559*c0909341SAndroid Build Coastguard Worker    psrad                m0, 1
2560*c0909341SAndroid Build Coastguard Worker    psrad                m1, 1
2561*c0909341SAndroid Build Coastguard Worker    psrad                m6, m7, 1
2562*c0909341SAndroid Build Coastguard Worker    psrad                m7, m10, 1
2563*c0909341SAndroid Build Coastguard Worker    psubd                m8, m9, m8 ; pd_6143
2564*c0909341SAndroid Build Coastguard Worker    psubd               m10, m8, m5
2565*c0909341SAndroid Build Coastguard Worker    paddd                m5, m9, m2
2566*c0909341SAndroid Build Coastguard Worker    psubd                m2, m8, m3
2567*c0909341SAndroid Build Coastguard Worker    paddd                m3, m9, m4
2568*c0909341SAndroid Build Coastguard Worker    psrad                m4, m2, 12
2569*c0909341SAndroid Build Coastguard Worker    psrad                m2, m10, 12
2570*c0909341SAndroid Build Coastguard Worker    psrad                m3, 12
2571*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12
2572*c0909341SAndroid Build Coastguard Worker    ret
2573*c0909341SAndroid Build Coastguard Worker
2574*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct
2575*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst
2576*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst
2577*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity
2578*c0909341SAndroid Build Coastguard Worker
2579*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
2580*c0909341SAndroid Build Coastguard Worker.pass1:
2581*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
2582*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*1]
2583*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*2]
2584*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*3]
2585*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*4]
2586*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*5]
2587*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*6]
2588*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*7]
2589*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2590*c0909341SAndroid Build Coastguard Worker.pass2:
2591*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
2592*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
2593*c0909341SAndroid Build Coastguard Worker.pass2_main:
2594*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
2595*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
2596*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
2597*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_4096]
2598*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
2599*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
2600*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
2601*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
2602*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m2
2603*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
2604*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m4, m1
2605*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m1
2606*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m2 ; 1 5
2607*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m2     ; 0 4
2608*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m3, m4 ; 2 6
2609*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m4     ; 3 7
2610*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
2611*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
2612*c0909341SAndroid Build Coastguard Worker    call .write_2x8x2_start
2613*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m12
2614*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, m12
2615*c0909341SAndroid Build Coastguard Worker    call .write_2x8x2_zero
2616*c0909341SAndroid Build Coastguard Worker    RET
2617*c0909341SAndroid Build Coastguard Worker.write_2x8x2_start:
2618*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*5]
2619*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
2620*c0909341SAndroid Build Coastguard Worker.write_2x8x2_zero:
2621*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m6
2622*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m6
2623*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*2], m6
2624*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*3], m6
2625*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*4
2626*c0909341SAndroid Build Coastguard Worker.write_2x8x2:
2627*c0909341SAndroid Build Coastguard Worker    mova                xm4, [dstq+strideq*0]
2628*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, [dstq+strideq*4], 1
2629*c0909341SAndroid Build Coastguard Worker    mova                xm5, [dstq+strideq*1]
2630*c0909341SAndroid Build Coastguard Worker    vinserti128          m5, [dstq+r6       ], 1
2631*c0909341SAndroid Build Coastguard Worker    paddw                m0, m4
2632*c0909341SAndroid Build Coastguard Worker    paddw                m1, m5
2633*c0909341SAndroid Build Coastguard Worker    pmaxsw               m0, m6
2634*c0909341SAndroid Build Coastguard Worker    pmaxsw               m1, m6
2635*c0909341SAndroid Build Coastguard Worker    pminsw               m0, m7
2636*c0909341SAndroid Build Coastguard Worker    pminsw               m1, m7
2637*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm0
2638*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*1], xm1
2639*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+strideq*4], m0, 1
2640*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+r6       ], m1, 1
2641*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
2642*c0909341SAndroid Build Coastguard Worker    ret
2643*c0909341SAndroid Build Coastguard Worker
2644*c0909341SAndroid Build Coastguard Worker%macro TRANSPOSE_8X8_DWORD 12 ; src/dst[1-8], tmp[1-4]
2645*c0909341SAndroid Build Coastguard Worker    punpckldq            m%9,  m%1,  m%2 ; aibj emfn
2646*c0909341SAndroid Build Coastguard Worker    punpckhdq            m%1,  m%2       ; ckdl gohp
2647*c0909341SAndroid Build Coastguard Worker    punpckldq           m%10,  m%3,  m%4 ; qyrz uCvD
2648*c0909341SAndroid Build Coastguard Worker    punpckhdq            m%3,  m%4       ; sAtB wExF
2649*c0909341SAndroid Build Coastguard Worker    punpckldq           m%11,  m%5,  m%6 ; GOHP KSLT
2650*c0909341SAndroid Build Coastguard Worker    punpckhdq            m%5,  m%6       ; IQJR MUNV
2651*c0909341SAndroid Build Coastguard Worker    punpckldq           m%12,  m%7,  m%8 ; WeXf aibj
2652*c0909341SAndroid Build Coastguard Worker    punpckhdq            m%7,  m%8       ; YgZh ckdl
2653*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m%2,  m%9, m%10 ; aiqy emuC
2654*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m%9, m%10       ; bjrz fnvD
2655*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m%4,  m%1,  m%3 ; cksA gowE
2656*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m%10,  m%1,  m%3 ; dltB hpxF
2657*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m%6, m%11, m%12 ; GOWe KSai
2658*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m%11, m%12       ; HPXf LTbj
2659*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m%8,  m%5,  m%7 ; IQYg MUck
2660*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m%12,  m%5,  m%7 ; JRZh NVdl
2661*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%1,  m%2,  m%6, 0x20   ; out0
2662*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%5,  m%2,  m%6, 0x31   ; out4
2663*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%2,  m%9, m%11, 0x20   ; out1
2664*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%6,  m%9, m%11, 0x31   ; out5
2665*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%3,  m%4,  m%8, 0x20   ; out2
2666*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%7,  m%4,  m%8, 0x31   ; out6
2667*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%4, m%10, m%12, 0x20   ; out3
2668*c0909341SAndroid Build Coastguard Worker    vperm2i128           m%8, m%10, m%12, 0x31   ; out7
2669*c0909341SAndroid Build Coastguard Worker%endmacro
2670*c0909341SAndroid Build Coastguard Worker
2671*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct,      12
2672*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity, 12
2673*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst,     12
2674*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst, 12
2675*c0909341SAndroid Build Coastguard Worker
2676*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
2677*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
2678*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
2679*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_10bpc).pass1
2680*c0909341SAndroid Build Coastguard Worker.pass2:
2681*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2682*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2683*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
2684*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
2685*c0909341SAndroid Build Coastguard Worker    call .transpose_8x8
2686*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
2687*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
2688*c0909341SAndroid Build Coastguard Worker    call .round_shift4
2689*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_12bpc).pass2_end
2690*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2691*c0909341SAndroid Build Coastguard Worker.write_8x4_start:
2692*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pixel_12bpc_max]
2693*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
2694*c0909341SAndroid Build Coastguard Worker    pxor                m10, m10
2695*c0909341SAndroid Build Coastguard Worker    ret
2696*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2697*c0909341SAndroid Build Coastguard Worker.transpose_8x8:
2698*c0909341SAndroid Build Coastguard Worker    TRANSPOSE_8X8_DWORD 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2699*c0909341SAndroid Build Coastguard Worker    ret
2700*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2701*c0909341SAndroid Build Coastguard Worker.round_shift4:
2702*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pd_8]
2703*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m1}, m0, m6, m5, m3
2704*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6, m7 ; out1
2705*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7     ; out6
2706*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m9 ; out7
2707*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9     ; out0
2708*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m4 ; out2
2709*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4     ; out5
2710*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3, m8 ; out4
2711*c0909341SAndroid Build Coastguard Worker    paddd                m3, m8     ; out3
2712*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7
2713*c0909341SAndroid Build Coastguard Worker    ret
2714*c0909341SAndroid Build Coastguard Worker
2715*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct,      12
2716*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst,     12
2717*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst, 12
2718*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity, 12
2719*c0909341SAndroid Build Coastguard Worker
2720*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
2721*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
2722*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
2723*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_10bpc).pass1
2724*c0909341SAndroid Build Coastguard Worker.pass2:
2725*c0909341SAndroid Build Coastguard Worker    call .pass2_main
2726*c0909341SAndroid Build Coastguard Worker.pass2_end:
2727*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
2728*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m2, m3
2729*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1
2730*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).write_8x4_start
2731*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2732*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4, m5
2733*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m6, m7
2734*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1
2735*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2736*c0909341SAndroid Build Coastguard Worker    RET
2737*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2738*c0909341SAndroid Build Coastguard Worker.pass2_main:
2739*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2740*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2741*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
2742*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
2743*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).transpose_8x8
2744*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
2745*c0909341SAndroid Build Coastguard Worker.pass2_main2:
2746*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_10bpc).main2
2747*c0909341SAndroid Build Coastguard Worker    pslld                m9, m8, 3  ; pd_8
2748*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9
2749*c0909341SAndroid Build Coastguard Worker    psubd                m1, m9, m1 ; 8+x
2750*c0909341SAndroid Build Coastguard Worker    paddd                m6, m9
2751*c0909341SAndroid Build Coastguard Worker    psubd                m7, m9, m7
2752*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 4}, m0, m1, m6, m7
2753*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_17408]
2754*c0909341SAndroid Build Coastguard Worker    psubd                m8, m9, m8 ; 17407
2755*c0909341SAndroid Build Coastguard Worker    paddd                m2, m9
2756*c0909341SAndroid Build Coastguard Worker    psubd                m3, m8, m3
2757*c0909341SAndroid Build Coastguard Worker    paddd                m4, m9
2758*c0909341SAndroid Build Coastguard Worker    psubd                m5, m8, m5
2759*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 15}, m2, m3, m4, m5
2760*c0909341SAndroid Build Coastguard Worker    ret
2761*c0909341SAndroid Build Coastguard Worker
2762*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct,      12
2763*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst,     12
2764*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst, 12
2765*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity, 12
2766*c0909341SAndroid Build Coastguard Worker
2767*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
2768*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
2769*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
2770*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x8_internal_10bpc).pass1
2771*c0909341SAndroid Build Coastguard Worker.pass2:
2772*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_12bpc).pass2_main
2773*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m7, m6
2774*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m1, m0
2775*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5, m4
2776*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m7, q3120
2777*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q3120
2778*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).write_8x4_start
2779*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2780*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m3, m2
2781*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
2782*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m6, q3120
2783*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2784*c0909341SAndroid Build Coastguard Worker    RET
2785*c0909341SAndroid Build Coastguard Worker
2786*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct,      12
2787*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst,     12
2788*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst, 12
2789*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity, 12
2790*c0909341SAndroid Build Coastguard Worker
2791*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
2792*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x8_internal_10bpc).pass1
2793*c0909341SAndroid Build Coastguard Worker.pass2:
2794*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
2795*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
2796*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x8_internal_10bpc).pass2_main
2797*c0909341SAndroid Build Coastguard Worker
2798*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2-4 0,10 ; type1, type2, eob_offset, bitdepth
2799*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, %3, 8x16, %4
2800*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
2801*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
2802*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m2, [dconly_%4bpc]
2803*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
2804*c0909341SAndroid Build Coastguard Worker    or                  r3d, 16
2805*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
2806*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
2807*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
2808*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly2
2809*c0909341SAndroid Build Coastguard Worker%endif
2810*c0909341SAndroid Build Coastguard Worker%endmacro
2811*c0909341SAndroid Build Coastguard Worker
2812*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct
2813*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity, 35
2814*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst
2815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst
2816*c0909341SAndroid Build Coastguard Worker
2817*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2
2818*c0909341SAndroid Build Coastguard Worker%undef cmp
2819*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
2820*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
2821*c0909341SAndroid Build Coastguard Worker.pass1:
2822*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
2823*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
2824*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 43
2825*c0909341SAndroid Build Coastguard Worker    jl .fast
2826*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
2827*c0909341SAndroid Build Coastguard Worker    call .pass1_main
2828*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
2829*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m0
2830*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m1
2831*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m2
2832*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m3
2833*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m4
2834*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m5
2835*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m6
2836*c0909341SAndroid Build Coastguard Worker    mova                m15, m7
2837*c0909341SAndroid Build Coastguard Worker    call .pass1_main
2838*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+32* 1]
2839*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+32* 3]
2840*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+32* 5]
2841*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+32* 7]
2842*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32* 9]
2843*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*11]
2844*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+32*13]
2845*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2846*c0909341SAndroid Build Coastguard Worker.fast:
2847*c0909341SAndroid Build Coastguard Worker    call .pass1_main
2848*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
2849*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
2850*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
2851*c0909341SAndroid Build Coastguard Worker.pass2:
2852*c0909341SAndroid Build Coastguard Worker    call .transpose
2853*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_8bpc).main
2854*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
2855*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m2, m4, m6
2856*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q2031}, m1, m3, m5, m7
2857*c0909341SAndroid Build Coastguard Worker.end:
2858*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
2859*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
2860*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4_start
2861*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m2, m12
2862*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m3, m12
2863*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2864*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4, m12
2865*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m5, m12
2866*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2867*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m6, m12
2868*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m7, m12
2869*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
2870*c0909341SAndroid Build Coastguard Worker    RET
2871*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2872*c0909341SAndroid Build Coastguard Worker.transpose:
2873*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m8
2874*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m9
2875*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m10
2876*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m11
2877*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m12
2878*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m13
2879*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m14
2880*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m15
2881*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
2882*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m1
2883*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
2884*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
2885*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
2886*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m4, m5
2887*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m5
2888*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m6, m7
2889*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7
2890*c0909341SAndroid Build Coastguard Worker    punpckhdq            m7, m3, m6
2891*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m6
2892*c0909341SAndroid Build Coastguard Worker    punpckhdq            m6, m4, m5
2893*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m5
2894*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m8, m1
2895*c0909341SAndroid Build Coastguard Worker    punpckldq            m8, m1
2896*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m2
2897*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
2898*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m0, m3, 0x31
2899*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm3, 1
2900*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m1, m7, 0x31
2901*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm7, 1
2902*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7, m5, m6, 0x31
2903*c0909341SAndroid Build Coastguard Worker    vinserti128          m5, xm6, 1
2904*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6, m8, m4, 0x31
2905*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, m8, xm4, 1
2906*c0909341SAndroid Build Coastguard Worker    ret
2907*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2908*c0909341SAndroid Build Coastguard Worker.pass1_main:
2909*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+32* 0]
2910*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+32* 2]
2911*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+32* 4]
2912*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+32* 6]
2913*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+32* 8]
2914*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+32*10]
2915*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+32*12]
2916*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+32*14]
2917*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main_rect2
2918*c0909341SAndroid Build Coastguard Worker    jmp  m(idct_8x8_internal_10bpc).round_shift1
2919*c0909341SAndroid Build Coastguard WorkerALIGN function_align
2920*c0909341SAndroid Build Coastguard Worker.main_evenhalf:
2921*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6, m7  ; idct8 out1
2922*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7      ; idct8 out6
2923*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m9  ; idct8 out7
2924*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9      ; idct8 out0
2925*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m4  ; idct8 out2
2926*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4      ; idct8 out5
2927*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3, m8  ; idct8 out4
2928*c0909341SAndroid Build Coastguard Worker    paddd                m3, m8      ; idct8 out3
2929*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
2930*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
2931*c0909341SAndroid Build Coastguard Worker    ret
2932*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast_rect2:
2933*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
2934*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
2935*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast: ; lower half zero
2936*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_4076]
2937*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_401]
2938*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_m1189]
2939*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_3920]
2940*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_3612]
2941*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1931]
2942*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_m2598]
2943*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3166]
2944*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m0
2945*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m8
2946*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m1
2947*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m9
2948*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m2
2949*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m10
2950*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m3
2951*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15
2952*c0909341SAndroid Build Coastguard Worker    jmp .main_oddhalf_fast2
2953*c0909341SAndroid Build Coastguard Worker.main_oddhalf_rect2:
2954*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
2955*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
2956*c0909341SAndroid Build Coastguard Worker.main_oddhalf:
2957*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         0, 7, 8, 9, 10, _,  401, 4076 ; t8a,  t15a
2958*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 1, 8, 9, 10, _, 3920, 1189 ; t11a, t12a
2959*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 5, 8, 9, 10, _, 1931, 3612 ; t10a, t13a
2960*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 3, 8, 9, 10, _, 3166, 2598 ; t9a,  t14a
2961*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast2:
2962*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3
2963*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3
2964*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t9
2965*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t8
2966*c0909341SAndroid Build Coastguard Worker    psubd                m4, m6, m2 ; t10
2967*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6     ; t11
2968*c0909341SAndroid Build Coastguard Worker    psubd                m6, m1, m5 ; t13
2969*c0909341SAndroid Build Coastguard Worker    paddd                m5, m1     ; t12
2970*c0909341SAndroid Build Coastguard Worker    psubd                m1, m7, m3 ; t14
2971*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; t15
2972*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m1, m4, m6, m0, m2, m5, m7
2973*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m1, m4, m6, m0, m2, m5, m7
2974*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3784]
2975*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1567]
2976*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 8, 3, 9, _, 11, 10, 15
2977*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 4, 3, 9, _, 11, 10, 15, 2
2978*c0909341SAndroid Build Coastguard Worker    psubd                m3, m1, m4 ; t10
2979*c0909341SAndroid Build Coastguard Worker    paddd                m1, m4     ; t9
2980*c0909341SAndroid Build Coastguard Worker    psubd                m4, m0, m2 ; t11a
2981*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2     ; t8a
2982*c0909341SAndroid Build Coastguard Worker    psubd                m2, m8, m6 ; t13
2983*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8     ; t14
2984*c0909341SAndroid Build Coastguard Worker    psubd                m8, m7, m5 ; t12a
2985*c0909341SAndroid Build Coastguard Worker    paddd                m7, m5     ; t15a
2986*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m2, m8, m3, m4, m0, m1, m6, m7
2987*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m2, m8, m3, m4, m0, m1, m6, m7
2988*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m14}, m2, m8, m3, m4
2989*c0909341SAndroid Build Coastguard Worker    paddd                m2, m11
2990*c0909341SAndroid Build Coastguard Worker    paddd                m8, m11
2991*c0909341SAndroid Build Coastguard Worker    paddd                m5, m2, m3 ; t13a
2992*c0909341SAndroid Build Coastguard Worker    psubd                m2, m3     ; t10a
2993*c0909341SAndroid Build Coastguard Worker    psubd                m3, m8, m4 ; t11
2994*c0909341SAndroid Build Coastguard Worker    paddd                m4, m8     ; t12
2995*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m5, m2, m3, m4
2996*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m7
2997*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m6
2998*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m5
2999*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m4
3000*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m3
3001*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m2
3002*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m1
3003*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m0
3004*c0909341SAndroid Build Coastguard Worker    ret
3005*c0909341SAndroid Build Coastguard Worker
3006*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct
3007*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst
3008*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst
3009*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity, 35
3010*c0909341SAndroid Build Coastguard Worker
3011*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2
3012*c0909341SAndroid Build Coastguard Worker%undef cmp
3013*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3014*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3015*c0909341SAndroid Build Coastguard Worker.pass1:
3016*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
3017*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
3018*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 43
3019*c0909341SAndroid Build Coastguard Worker    jl .fast
3020*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
3021*c0909341SAndroid Build Coastguard Worker    call .pass1_main
3022*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_10bpc).main_end
3023*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
3024*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m0
3025*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m1
3026*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m2
3027*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m3
3028*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m4
3029*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m5
3030*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m6
3031*c0909341SAndroid Build Coastguard Worker    mova                m15, m7
3032*c0909341SAndroid Build Coastguard Worker    call .pass1_main
3033*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_10bpc).main_end
3034*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+32* 1]
3035*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+32* 3]
3036*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+32* 5]
3037*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+32* 7]
3038*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32* 9]
3039*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*11]
3040*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+32*13]
3041*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3042*c0909341SAndroid Build Coastguard Worker.fast:
3043*c0909341SAndroid Build Coastguard Worker    call .pass1_main
3044*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_10bpc).main_end
3045*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
3046*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
3047*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3048*c0909341SAndroid Build Coastguard Worker.pass2:
3049*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).transpose
3050*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_8bpc).main
3051*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_8bpc).main_pass2_end
3052*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_2048]
3053*c0909341SAndroid Build Coastguard Worker    vpbroadcastd       xm12, [pw_4096]
3054*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q2031}, m0, m1, m2, m3
3055*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m4, m5, m6, m7
3056*c0909341SAndroid Build Coastguard Worker    psubw               m12, m8
3057*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x16_internal_10bpc).end
3058*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3059*c0909341SAndroid Build Coastguard Worker.pass1_main:
3060*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+32* 0]
3061*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+32*14]
3062*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+32* 2]
3063*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+32*12]
3064*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+32* 4]
3065*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+32*10]
3066*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+32* 6]
3067*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+32* 8]
3068*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
3069*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
3070*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x8_internal_10bpc).main2
3071*c0909341SAndroid Build Coastguard Worker
3072*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct
3073*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst
3074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst
3075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity, 35
3076*c0909341SAndroid Build Coastguard Worker
3077*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2
3078*c0909341SAndroid Build Coastguard Worker%undef cmp
3079*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3080*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3081*c0909341SAndroid Build Coastguard Worker.pass1:
3082*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
3083*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
3084*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 43
3085*c0909341SAndroid Build Coastguard Worker    jl .fast
3086*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
3087*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_10bpc).pass1_main
3088*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_8x8_internal_10bpc).main_end
3089*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
3090*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m0
3091*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m1
3092*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m2
3093*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m3
3094*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m4
3095*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m5
3096*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m6
3097*c0909341SAndroid Build Coastguard Worker    mova                m15, m7
3098*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_10bpc).pass1_main
3099*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_8x8_internal_10bpc).main_end
3100*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+32* 1]
3101*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+32* 3]
3102*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+32* 5]
3103*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+32* 7]
3104*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32* 9]
3105*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*11]
3106*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+32*13]
3107*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3108*c0909341SAndroid Build Coastguard Worker.fast:
3109*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_10bpc).pass1_main
3110*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_8x8_internal_10bpc).main_end
3111*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
3112*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
3113*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3114*c0909341SAndroid Build Coastguard Worker.pass2:
3115*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).transpose
3116*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_8bpc).main
3117*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_8bpc).main_pass2_end
3118*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
3119*c0909341SAndroid Build Coastguard Worker    vpbroadcastd       xm13, [pw_4096]
3120*c0909341SAndroid Build Coastguard Worker    mova                m11, m0
3121*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m7, q2031
3122*c0909341SAndroid Build Coastguard Worker    mova                m10, m1
3123*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m6, q2031
3124*c0909341SAndroid Build Coastguard Worker    mova                 m9, m2
3125*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m5, q2031
3126*c0909341SAndroid Build Coastguard Worker    mova                 m8, m3
3127*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m4, q2031
3128*c0909341SAndroid Build Coastguard Worker    vpermq               m4, m8, q3120
3129*c0909341SAndroid Build Coastguard Worker    vpermq               m5, m9, q3120
3130*c0909341SAndroid Build Coastguard Worker    vpermq               m6, m10, q3120
3131*c0909341SAndroid Build Coastguard Worker    vpermq               m7, m11, q3120
3132*c0909341SAndroid Build Coastguard Worker    psubw               m12, m13
3133*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x16_internal_10bpc).end
3134*c0909341SAndroid Build Coastguard Worker
3135*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct
3136*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst
3137*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst
3138*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity
3139*c0909341SAndroid Build Coastguard Worker
3140*c0909341SAndroid Build Coastguard Worker%macro IDTX16 3-4 ; src/dst, tmp, pw_1697x16, [pw_16384]
3141*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%2, m%3, m%1
3142*c0909341SAndroid Build Coastguard Worker%if %0 == 4 ; if downshifting by 1
3143*c0909341SAndroid Build Coastguard Worker%ifnum %4
3144*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%2, m%4
3145*c0909341SAndroid Build Coastguard Worker%else ; without rounding
3146*c0909341SAndroid Build Coastguard Worker    psraw               m%2, 1
3147*c0909341SAndroid Build Coastguard Worker%endif
3148*c0909341SAndroid Build Coastguard Worker%else
3149*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%1
3150*c0909341SAndroid Build Coastguard Worker%endif
3151*c0909341SAndroid Build Coastguard Worker    paddsw              m%1, m%2
3152*c0909341SAndroid Build Coastguard Worker%endmacro
3153*c0909341SAndroid Build Coastguard Worker
3154*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2
3155*c0909341SAndroid Build Coastguard Worker.pass1:
3156*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
3157*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+32* 0]
3158*c0909341SAndroid Build Coastguard Worker    pmulld               m8, m15, [cq+32* 1]
3159*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+32* 2]
3160*c0909341SAndroid Build Coastguard Worker    pmulld               m9, m15, [cq+32* 3]
3161*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+32* 4]
3162*c0909341SAndroid Build Coastguard Worker    pmulld              m10, m15, [cq+32* 5]
3163*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+32* 6]
3164*c0909341SAndroid Build Coastguard Worker    pmulld              m11, m15, [cq+32* 7]
3165*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m15, [cq+32* 8]
3166*c0909341SAndroid Build Coastguard Worker    pmulld              m12, m15, [cq+32* 9]
3167*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m15, [cq+32*10]
3168*c0909341SAndroid Build Coastguard Worker    pmulld              m13, m15, [cq+32*11]
3169*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m15, [cq+32*12]
3170*c0909341SAndroid Build Coastguard Worker    pmulld              m14, m15, [cq+32*13]
3171*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m15, [cq+32*14]
3172*c0909341SAndroid Build Coastguard Worker    pmulld              m15,      [cq+32*15]
3173*c0909341SAndroid Build Coastguard Worker    mova               [cq], m7
3174*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
3175*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m7}, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
3176*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3177*c0909341SAndroid Build Coastguard Worker    paddd                m7, [cq]
3178*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
3179*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3180*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3181*c0909341SAndroid Build Coastguard Worker.pass2:
3182*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m8
3183*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m9
3184*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m10
3185*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m11
3186*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m12
3187*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m13
3188*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m14
3189*c0909341SAndroid Build Coastguard Worker    packssdw            m13, m7, m15
3190*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_1697x16]
3191*c0909341SAndroid Build Coastguard Worker    REPX {IDTX16   x, 9, 8}, 0, 1, 2, 3, 4, 5, 6, 13
3192*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
3193*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
3194*c0909341SAndroid Build Coastguard Worker    call .pass2_end
3195*c0909341SAndroid Build Coastguard Worker    RET
3196*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3197*c0909341SAndroid Build Coastguard Worker.pass2_end:
3198*c0909341SAndroid Build Coastguard Worker    punpckhwd            m9, m0, m1
3199*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
3200*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m6, m13
3201*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m13
3202*c0909341SAndroid Build Coastguard Worker    punpckhwd           m13, m4, m5
3203*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m5
3204*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m2, m3
3205*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3
3206*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m5
3207*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m5
3208*c0909341SAndroid Build Coastguard Worker    punpckhdq           m11, m9, m2
3209*c0909341SAndroid Build Coastguard Worker    punpckldq            m9, m2
3210*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m4, m6
3211*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m6
3212*c0909341SAndroid Build Coastguard Worker    punpckldq            m6, m13, m1
3213*c0909341SAndroid Build Coastguard Worker    punpckhdq           m13, m1
3214*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m2
3215*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m2
3216*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m3, m4
3217*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m4
3218*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m9, m6
3219*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m9, m6
3220*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m10, m11, m13
3221*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m11, m13
3222*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
3223*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
3224*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x8_internal_10bpc).write_2x8x2_start
3225*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m2
3226*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m3
3227*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero
3228*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m8
3229*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m9
3230*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
3231*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero
3232*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m10
3233*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m11
3234*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero
3235*c0909341SAndroid Build Coastguard Worker    ret
3236*c0909341SAndroid Build Coastguard Worker
3237*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct,       0, 12
3238*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity, 35, 12
3239*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst,      0, 12
3240*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst,  0, 12
3241*c0909341SAndroid Build Coastguard Worker
3242*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
3243*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
3244*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
3245*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x16_internal_10bpc).pass1
3246*c0909341SAndroid Build Coastguard Worker.pass2:
3247*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
3248*c0909341SAndroid Build Coastguard Worker    call .transpose
3249*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3250*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3251*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m0
3252*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m2
3253*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m4
3254*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m6
3255*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+32* 1]
3256*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, m1
3257*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+32* 3]
3258*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+32* 5]
3259*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, m5
3260*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, m3
3261*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+32* 7]
3262*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12
3263*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
3264*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
3265*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
3266*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
3267*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+32* 0]
3268*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+32* 2]
3269*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+32* 4]
3270*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+32* 6]
3271*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+32* 8]
3272*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+32*10]
3273*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+32*12]
3274*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+32*14]
3275*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
3276*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
3277*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
3278*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_8]
3279*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
3280*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).pass1_rotations
3281*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 4}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
3282*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3283*c0909341SAndroid Build Coastguard Worker.end:
3284*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
3285*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m2, m3
3286*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m4, m5
3287*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m6, m7
3288*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m8, m9
3289*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m10, m11
3290*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m12, m13
3291*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m14, m15
3292*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
3293*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q3120
3294*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).write_8x4_start
3295*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
3296*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m2, q3120
3297*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m3, q3120
3298*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
3299*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m4, q3120
3300*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m5, q3120
3301*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
3302*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m6, q3120
3303*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m7, q3120
3304*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
3305*c0909341SAndroid Build Coastguard Worker    RET
3306*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3307*c0909341SAndroid Build Coastguard Worker.transpose:
3308*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m8
3309*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m9
3310*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m10
3311*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m11
3312*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).transpose_8x8
3313*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 0], m0
3314*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m1
3315*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 2], m2
3316*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m3
3317*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 4], m4
3318*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m5
3319*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 6], m6
3320*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m7
3321*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 8]
3322*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 9]
3323*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*10]
3324*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*11]
3325*c0909341SAndroid Build Coastguard Worker    mova                 m4, m12
3326*c0909341SAndroid Build Coastguard Worker    mova                 m5, m13
3327*c0909341SAndroid Build Coastguard Worker    mova                 m6, m14
3328*c0909341SAndroid Build Coastguard Worker    mova                 m7, m15
3329*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x8_internal_12bpc).transpose_8x8
3330*c0909341SAndroid Build Coastguard Worker
3331*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct,       0, 12
3332*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst,      0, 12
3333*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst,  0, 12
3334*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity, 35, 12
3335*c0909341SAndroid Build Coastguard Worker
3336*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
3337*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
3338*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
3339*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x16_internal_10bpc).pass1
3340*c0909341SAndroid Build Coastguard Worker.pass2:
3341*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
3342*c0909341SAndroid Build Coastguard Worker    call .pass2_main
3343*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).pass1_rotations
3344*c0909341SAndroid Build Coastguard Worker.pass2_end:
3345*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 4 }, m0,  m1,  m2,  m3,  m12, m13, m14, m15
3346*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 15}, m4,  m5,  m6,  m7,  m8,  m9,  m10, m11
3347*c0909341SAndroid Build Coastguard Worker    jmp m(idct_8x16_internal_12bpc).end
3348*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3349*c0909341SAndroid Build Coastguard Worker.pass2_main:
3350*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_12bpc).transpose
3351*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
3352*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
3353*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m0
3354*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m3
3355*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m4
3356*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m7
3357*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m13, [cq+32* 2] ;  2
3358*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m13, m1         ;  9
3359*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m13, m5         ; 13
3360*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m13, m2         ; 10
3361*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m13, [cq+32* 6] ;  6
3362*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m13, [cq+32* 5] ;  5
3363*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m13, m6         ; 14
3364*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m13, [cq+32* 1] ;  1
3365*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m0, m1, m2, m3, m4, m5, m6, m7
3366*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_2048]
3367*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
3368*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main_part1
3369*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m13, [cq+32* 0] ;  0
3370*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m13, [cq+32*15] ; 15
3371*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m13, [cq+32* 4] ;  4
3372*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m13, [cq+32*11] ; 11
3373*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m13, [cq+32* 8] ;  8
3374*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m13, [cq+32* 7] ;  7
3375*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m13, [cq+32*12] ; 12
3376*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m13, [cq+32* 3] ;  3
3377*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m0, m1, m2, m3, m4, m5, m6, m7
3378*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main_part2
3379*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_17408]
3380*c0909341SAndroid Build Coastguard Worker    psrld               m15, 11              ; pd_1
3381*c0909341SAndroid Build Coastguard Worker    psubd               m13, m14, m15        ; pd_17407
3382*c0909341SAndroid Build Coastguard Worker    pslld               m15, 3               ; pd_8
3383*c0909341SAndroid Build Coastguard Worker    ret
3384*c0909341SAndroid Build Coastguard Worker
3385*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct,       0, 12
3386*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst,      0, 12
3387*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst,  0, 12
3388*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity, 35, 12
3389*c0909341SAndroid Build Coastguard Worker
3390*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
3391*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
3392*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
3393*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_8x16_internal_10bpc).pass1
3394*c0909341SAndroid Build Coastguard Worker.pass2:
3395*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
3396*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x16_internal_12bpc).pass2_main
3397*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_16x8_internal_10bpc).pass1_rotations
3398*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_8x16_internal_12bpc).pass2_end
3399*c0909341SAndroid Build Coastguard Worker
3400*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct,      0, 12
3401*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst,     0, 12
3402*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst, 0, 12
3403*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity, 0, 12
3404*c0909341SAndroid Build Coastguard Worker
3405*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
3406*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x16_internal_10bpc).pass1
3407*c0909341SAndroid Build Coastguard Worker.pass2:
3408*c0909341SAndroid Build Coastguard Worker    call .pass2_main
3409*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m8
3410*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m9
3411*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m10
3412*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m11
3413*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m12
3414*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m13
3415*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m14
3416*c0909341SAndroid Build Coastguard Worker    packssdw            m13, m7, m15
3417*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
3418*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_16384]
3419*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x16_internal_10bpc).pass2_end
3420*c0909341SAndroid Build Coastguard Worker    RET
3421*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3422*c0909341SAndroid Build Coastguard Worker.pass2_main:
3423*c0909341SAndroid Build Coastguard Worker    mova               [cq], m7
3424*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [clip_18b_min]
3425*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m7}, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
3426*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3427*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, [cq]
3428*c0909341SAndroid Build Coastguard Worker    mova               [cq], m15
3429*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [clip_18b_max]
3430*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m15}, m0,  m1,  m2,  m3,  m4,  m5,  m6, m7, \
3431*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14
3432*c0909341SAndroid Build Coastguard Worker    pminsd              m15, [cq]
3433*c0909341SAndroid Build Coastguard Worker    mova               [cq], m7
3434*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_5793]
3435*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m7}, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
3436*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3437*c0909341SAndroid Build Coastguard Worker    pmulld               m7, [cq]
3438*c0909341SAndroid Build Coastguard Worker    mova               [cq], m15
3439*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_1024]
3440*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m15}, m0,  m1,  m2,  m3,  m4,  m5,  m6, m7, \
3441*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14
3442*c0909341SAndroid Build Coastguard Worker    paddd               m15, [cq]
3443*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 14}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
3444*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
3445*c0909341SAndroid Build Coastguard Worker    ret
3446*c0909341SAndroid Build Coastguard Worker
3447*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X4_FN 2-3 10 ; type1, type2, bitdepth
3448*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 16x4, %3
3449*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
3450*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_%3bpc]
3451*c0909341SAndroid Build Coastguard Worker%if %3 = 10
3452*c0909341SAndroid Build Coastguard Worker.dconly:
3453*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
3454*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
3455*c0909341SAndroid Build Coastguard Worker    or                  r3d, 4
3456*c0909341SAndroid Build Coastguard Worker.dconly2:
3457*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
3458*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
3459*c0909341SAndroid Build Coastguard Worker.dconly3:
3460*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
3461*c0909341SAndroid Build Coastguard Worker    add                 r6d, 2176
3462*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 12
3463*c0909341SAndroid Build Coastguard Worker    movd                xm0, r6d
3464*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm3
3465*c0909341SAndroid Build Coastguard Worker    vpbroadcastw         m0, xm0
3466*c0909341SAndroid Build Coastguard Worker.dconly_loop:
3467*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m0, [dstq+strideq*0]
3468*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m0, [dstq+strideq*1]
3469*c0909341SAndroid Build Coastguard Worker    psubusw              m1, m3
3470*c0909341SAndroid Build Coastguard Worker    psubusw              m2, m3
3471*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*0], m1
3472*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*1], m2
3473*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*2]
3474*c0909341SAndroid Build Coastguard Worker    sub                 r3d, 2
3475*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
3476*c0909341SAndroid Build Coastguard Worker    RET
3477*c0909341SAndroid Build Coastguard Worker%else
3478*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly
3479*c0909341SAndroid Build Coastguard Worker%endif
3480*c0909341SAndroid Build Coastguard Worker%endif
3481*c0909341SAndroid Build Coastguard Worker%endmacro
3482*c0909341SAndroid Build Coastguard Worker
3483*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct
3484*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity
3485*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst
3486*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst
3487*c0909341SAndroid Build Coastguard Worker
3488*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
3489*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_18b_min]
3490*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_18b_max]
3491*c0909341SAndroid Build Coastguard Worker.pass1:
3492*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m0, [cq+16* 0]
3493*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16* 4]
3494*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m1, [cq+16* 2]
3495*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m7, [cq+16* 6]
3496*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16*10]
3497*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m2, [cq+16* 8]
3498*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [cq+16*12]
3499*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m3, [cq+16*14]
3500*c0909341SAndroid Build Coastguard Worker    shufpd               m0, m4, 0x0c ;  0  4
3501*c0909341SAndroid Build Coastguard Worker    shufpd               m1, m5, 0x0c ;  2 10
3502*c0909341SAndroid Build Coastguard Worker    shufpd               m2, m6, 0x0c ;  8 12
3503*c0909341SAndroid Build Coastguard Worker    shufpd               m3, m7, 0x0c ; 14  6
3504*c0909341SAndroid Build Coastguard Worker    call .pass1_main
3505*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m10, [cq+16* 1]
3506*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16* 5]
3507*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m11, [cq+16*15]
3508*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16*11]
3509*c0909341SAndroid Build Coastguard Worker    shufpd              m10, m4, 0x0c ;  1  5
3510*c0909341SAndroid Build Coastguard Worker    shufpd              m11, m5, 0x0c ; 15 11
3511*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m5, [cq+16* 9]
3512*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16*13]
3513*c0909341SAndroid Build Coastguard Worker    shufpd               m5, m4, 0x0c ;  9 13
3514*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [cq+16* 7]
3515*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m4, [cq+16* 3]
3516*c0909341SAndroid Build Coastguard Worker    shufpd               m6, m4, 0x0c ;  7  3
3517*c0909341SAndroid Build Coastguard Worker    call .pass1_main2
3518*c0909341SAndroid Build Coastguard Worker    pcmpeqd              m4, m4
3519*c0909341SAndroid Build Coastguard Worker    REPX      {psubd x, m4}, m0, m1, m2, m3
3520*c0909341SAndroid Build Coastguard Worker    call .pass1_main3
3521*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 1 }, m0, m1, m2, m3, m4, m5, m6, m7
3522*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3523*c0909341SAndroid Build Coastguard Worker.pass2:
3524*c0909341SAndroid Build Coastguard Worker    call .transpose_4x16_packed
3525*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
3526*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_8bpc).main
3527*c0909341SAndroid Build Coastguard Worker.end:
3528*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_2048]
3529*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m4}, m0, m1, m2, m3
3530*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_10bpc_max]
3531*c0909341SAndroid Build Coastguard Worker.end2:
3532*c0909341SAndroid Build Coastguard Worker    paddw                m0, [dstq+strideq*0]
3533*c0909341SAndroid Build Coastguard Worker    paddw                m1, [dstq+strideq*1]
3534*c0909341SAndroid Build Coastguard Worker.end3:
3535*c0909341SAndroid Build Coastguard Worker    lea                  r6, [dstq+strideq*2]
3536*c0909341SAndroid Build Coastguard Worker    paddw                m2, [r6  +strideq*0]
3537*c0909341SAndroid Build Coastguard Worker    paddw                m3, [r6  +strideq*1]
3538*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
3539*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m4}, 0, 1, 2, 3, 4, 5, 6, 7
3540*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsw x, m4}, m0, m1, m2, m3
3541*c0909341SAndroid Build Coastguard Worker    REPX     {pminsw x, m5}, m0, m1, m2, m3
3542*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*0], m0
3543*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*1], m1
3544*c0909341SAndroid Build Coastguard Worker    mova   [r6  +strideq*0], m2
3545*c0909341SAndroid Build Coastguard Worker    mova   [r6  +strideq*1], m3
3546*c0909341SAndroid Build Coastguard Worker    RET
3547*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3548*c0909341SAndroid Build Coastguard Worker.pass1_main:
3549*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
3550*c0909341SAndroid Build Coastguard Worker    call m(idct_8x4_internal_10bpc).main
3551*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m4   ; idct8 out7 out6
3552*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4       ; idct8 out0 out1
3553*c0909341SAndroid Build Coastguard Worker    paddd                m1, m2, m5   ; idct8 out3 out2
3554*c0909341SAndroid Build Coastguard Worker    psubd                m2, m5       ; idct8 out4 out5
3555*c0909341SAndroid Build Coastguard Worker    ret
3556*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3557*c0909341SAndroid Build Coastguard Worker.pass1_main2:
3558*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D        10, 11, 4, 12, 13, 7,  401_1931, 4076_3612, 1
3559*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5,  6, 4, 12, 13, 7, 3166_3920, 2598_1189, 1
3560*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m12, [pd_3784_m3784]
3561*c0909341SAndroid Build Coastguard Worker    psubd                m4, m10, m5
3562*c0909341SAndroid Build Coastguard Worker    paddd               m10, m5       ;  t8  t11
3563*c0909341SAndroid Build Coastguard Worker    psignd               m4, m12      ;  t9  t10
3564*c0909341SAndroid Build Coastguard Worker    psubd                m5, m11, m6
3565*c0909341SAndroid Build Coastguard Worker    paddd               m11, m6       ; t15  t12
3566*c0909341SAndroid Build Coastguard Worker    psignd               m5, m12      ; t14  t13
3567*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_1567]
3568*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [pd_3784]
3569*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m5, m4
3570*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m5, m4
3571*c0909341SAndroid Build Coastguard Worker    pmulld              m12, m5
3572*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m6
3573*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m6, [pd_1567_m1567]
3574*c0909341SAndroid Build Coastguard Worker    pmulld              m13, m4
3575*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m6
3576*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m10, m11, m0, m1
3577*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m10, m11, m0, m1
3578*c0909341SAndroid Build Coastguard Worker    paddd               m12, m7
3579*c0909341SAndroid Build Coastguard Worker    paddd                m5, m7
3580*c0909341SAndroid Build Coastguard Worker    paddd                m4, m12
3581*c0909341SAndroid Build Coastguard Worker    psubd                m5, m13
3582*c0909341SAndroid Build Coastguard Worker    psrad                m4, 12       ; t14a t10a
3583*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12       ; t9a  t13a
3584*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_2896]
3585*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m11, m5
3586*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m11, m4
3587*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m10, m4
3588*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m10, m5
3589*c0909341SAndroid Build Coastguard Worker    psubd                m5, m11, m6  ; t12a t13
3590*c0909341SAndroid Build Coastguard Worker    paddd               m11, m6       ; t15a t14
3591*c0909341SAndroid Build Coastguard Worker    psubd                m6, m10, m4  ; t11a t10
3592*c0909341SAndroid Build Coastguard Worker    paddd               m10, m4       ; t8a  t9
3593*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m5, m6
3594*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m5, m6
3595*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m12
3596*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m12
3597*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsd x, m8}, m2, m3, m11, m10
3598*c0909341SAndroid Build Coastguard Worker    REPX     {pminsd x, m9}, m2, m3, m11, m10
3599*c0909341SAndroid Build Coastguard Worker    ret
3600*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3601*c0909341SAndroid Build Coastguard Worker.pass1_main3:
3602*c0909341SAndroid Build Coastguard Worker    paddd                m5, m7
3603*c0909341SAndroid Build Coastguard Worker    psubd                m4, m5, m6
3604*c0909341SAndroid Build Coastguard Worker    paddd                m5, m6
3605*c0909341SAndroid Build Coastguard Worker    psrad                m4, 12      ; t11 t10a
3606*c0909341SAndroid Build Coastguard Worker    psrad                m5, 12      ; t12 t13a
3607*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m11 ; out15 out14
3608*c0909341SAndroid Build Coastguard Worker    paddd                m0, m11     ; out0  out1
3609*c0909341SAndroid Build Coastguard Worker    psubd                m6, m1, m5  ; out12 out13
3610*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5      ; out3  out2
3611*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2, m4  ; out11 out10
3612*c0909341SAndroid Build Coastguard Worker    paddd                m2, m4      ; out4  out5
3613*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3, m10 ; out8  out9
3614*c0909341SAndroid Build Coastguard Worker    paddd                m3, m10     ; out7  out6
3615*c0909341SAndroid Build Coastguard Worker    REPX {pshufd x, x, q1032}, m1, m3, m5, m7
3616*c0909341SAndroid Build Coastguard Worker    ret
3617*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3618*c0909341SAndroid Build Coastguard Worker.transpose_4x16_packed:
3619*c0909341SAndroid Build Coastguard Worker    vbroadcasti128       m8, [deint_shuf]
3620*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
3621*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
3622*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m5
3623*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m7
3624*c0909341SAndroid Build Coastguard Worker    REPX     {pshufb x, m8}, m0, m2, m4, m6
3625*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m2
3626*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m2
3627*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m4, m6
3628*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m4, m6
3629*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m1, m2, 0x31
3630*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm2, 1
3631*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m0, m4, 0x31
3632*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm4, 1
3633*c0909341SAndroid Build Coastguard Worker    ret
3634*c0909341SAndroid Build Coastguard Worker
3635*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct
3636*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst
3637*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst
3638*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity
3639*c0909341SAndroid Build Coastguard Worker
3640*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
3641*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3642*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3643*c0909341SAndroid Build Coastguard Worker.pass1:
3644*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).main
3645*c0909341SAndroid Build Coastguard Worker    psrad               m11, 11 ; pd_1
3646*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
3647*c0909341SAndroid Build Coastguard Worker    paddd                m4, m5, m11
3648*c0909341SAndroid Build Coastguard Worker    paddd                m5, m6, m11
3649*c0909341SAndroid Build Coastguard Worker    paddd                m6, m7, m11
3650*c0909341SAndroid Build Coastguard Worker    paddd                m7, m8, m11
3651*c0909341SAndroid Build Coastguard Worker.pass1_end:
3652*c0909341SAndroid Build Coastguard Worker    REPX {pshufd x, x, q1032}, m0, m2, m4, m6
3653*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7
3654*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3655*c0909341SAndroid Build Coastguard Worker.pass2:
3656*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).transpose_4x16_packed
3657*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
3658*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main
3659*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end
3660*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3661*c0909341SAndroid Build Coastguard Worker.main:
3662*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_1321]
3663*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
3664*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*1]
3665*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2482]
3666*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*6]
3667*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*7]
3668*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m0, m6
3669*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m1, m6    ; 1321*in0
3670*c0909341SAndroid Build Coastguard Worker    pmulld               m9, m2, m7
3671*c0909341SAndroid Build Coastguard Worker    pmulld               m8, m3, m7    ; 2482*in3
3672*c0909341SAndroid Build Coastguard Worker    paddd                m4, m9
3673*c0909341SAndroid Build Coastguard Worker    paddd                m8, m5        ; 1321*in0 + 2482*in3
3674*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m0, m7
3675*c0909341SAndroid Build Coastguard Worker    pmulld               m9, m1, m7    ; 2482*in0
3676*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2
3677*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3        ; in0 + in3
3678*c0909341SAndroid Build Coastguard Worker    paddd                m7, m6        ; pd_3803
3679*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m7
3680*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m7        ; 3803*in3
3681*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2
3682*c0909341SAndroid Build Coastguard Worker    psubd                m9, m3        ; 2482*in0 - 3803*in3
3683*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*4]
3684*c0909341SAndroid Build Coastguard Worker    pmulld              m10, m7, m2
3685*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m6, m2
3686*c0909341SAndroid Build Coastguard Worker    psubd                m2, m0
3687*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*5]
3688*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m0        ; 3803*in2
3689*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m0        ; 1321*in2
3690*c0909341SAndroid Build Coastguard Worker    psubd                m0, m1        ; in2 - in0 - in3
3691*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m1, [pd_m3344]
3692*c0909341SAndroid Build Coastguard Worker    paddd                m4, m10
3693*c0909341SAndroid Build Coastguard Worker    paddd                m7, m8        ; t0
3694*c0909341SAndroid Build Coastguard Worker    psubd                m5, m3
3695*c0909341SAndroid Build Coastguard Worker    psubd                m9, m6        ; t1
3696*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m1
3697*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m1        ; t2
3698*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m1, [cq+32*2]
3699*c0909341SAndroid Build Coastguard Worker    pmulld               m1, [cq+32*3] ; -t3
3700*c0909341SAndroid Build Coastguard Worker    ret
3701*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3702*c0909341SAndroid Build Coastguard Worker.main_end:
3703*c0909341SAndroid Build Coastguard Worker    ; expects: m6 = rnd
3704*c0909341SAndroid Build Coastguard Worker    paddd                m5, m6
3705*c0909341SAndroid Build Coastguard Worker    paddd                m9, m6
3706*c0909341SAndroid Build Coastguard Worker    paddd               m10, m4, m5
3707*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6
3708*c0909341SAndroid Build Coastguard Worker    paddd                m8, m7, m6
3709*c0909341SAndroid Build Coastguard Worker    paddd                m7, m9
3710*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3        ; out0 (unshifted)
3711*c0909341SAndroid Build Coastguard Worker    psubd                m5, m3        ; out1 (unshifted)
3712*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6        ; out2 (unshifted)
3713*c0909341SAndroid Build Coastguard Worker    paddd                m3, m10       ; out3 (unshifted)
3714*c0909341SAndroid Build Coastguard Worker    psubd                m8, m1        ; out4 (unshifted)
3715*c0909341SAndroid Build Coastguard Worker    psubd                m9, m1        ; out5 (unshifted)
3716*c0909341SAndroid Build Coastguard Worker    paddd                m6, m0        ; out6 (unshifted)
3717*c0909341SAndroid Build Coastguard Worker    paddd                m7, m1        ; out7 (unshifted)
3718*c0909341SAndroid Build Coastguard Worker    ret
3719*c0909341SAndroid Build Coastguard Worker
3720*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct
3721*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst
3722*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst
3723*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity
3724*c0909341SAndroid Build Coastguard Worker
3725*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
3726*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3727*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3728*c0909341SAndroid Build Coastguard Worker.pass1:
3729*c0909341SAndroid Build Coastguard Worker    call m(iadst_4x16_internal_10bpc).main
3730*c0909341SAndroid Build Coastguard Worker    psrad               m11, 11 ; pd_1
3731*c0909341SAndroid Build Coastguard Worker    paddd                m4, m3, m11
3732*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5, m11
3733*c0909341SAndroid Build Coastguard Worker    paddd                m5, m2, m11
3734*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6, m11
3735*c0909341SAndroid Build Coastguard Worker    paddd                m6, m1, m11
3736*c0909341SAndroid Build Coastguard Worker    paddd                m1, m7, m11
3737*c0909341SAndroid Build Coastguard Worker    paddd                m7, m0, m11
3738*c0909341SAndroid Build Coastguard Worker    paddd                m0, m8, m11
3739*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x4_internal_10bpc).pass1_end
3740*c0909341SAndroid Build Coastguard Worker.pass2:
3741*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).transpose_4x16_packed
3742*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
3743*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_8bpc).main
3744*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_2048]
3745*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m3, m4
3746*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m2, m4
3747*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m1, m4
3748*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m0, m4
3749*c0909341SAndroid Build Coastguard Worker    paddw                m0, m5, [dstq+strideq*0]
3750*c0909341SAndroid Build Coastguard Worker    paddw                m1, m6, [dstq+strideq*1]
3751*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_10bpc_max]
3752*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end3
3753*c0909341SAndroid Build Coastguard Worker
3754*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct
3755*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst
3756*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst
3757*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity
3758*c0909341SAndroid Build Coastguard Worker
3759*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2
3760*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5793]
3761*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [cq+32*0], q3120 ; 0 1
3762*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [cq+32*1], q3120 ; 2 3
3763*c0909341SAndroid Build Coastguard Worker    vpermq               m2, [cq+32*2], q3120 ; 4 5
3764*c0909341SAndroid Build Coastguard Worker    vpermq               m3, [cq+32*3], q3120 ; 6 7
3765*c0909341SAndroid Build Coastguard Worker    vpermq               m4, [cq+32*4], q3120 ; 8 9
3766*c0909341SAndroid Build Coastguard Worker    vpermq               m5, [cq+32*5], q3120 ; a b
3767*c0909341SAndroid Build Coastguard Worker    vpermq               m6, [cq+32*6], q3120 ; c d
3768*c0909341SAndroid Build Coastguard Worker    vpermq               m7, [cq+32*7], q3120 ; e f
3769*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_3072]
3770*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7
3771*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m0, m1, m2, m3, m4, m5, m6, m7
3772*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m0, m1, m2, m3, m4, m5, m6, m7
3773*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3774*c0909341SAndroid Build Coastguard Worker.pass2:
3775*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).transpose_4x16_packed
3776*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pw_1697x8]
3777*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m4, m7, m0
3778*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m5, m7, m1
3779*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m6, m7, m2
3780*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m7, m3
3781*c0909341SAndroid Build Coastguard Worker    paddsw               m0, m4
3782*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m5
3783*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m6
3784*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m7
3785*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end
3786*c0909341SAndroid Build Coastguard Worker
3787*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct,      12
3788*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity, 12
3789*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst,     12
3790*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst, 12
3791*c0909341SAndroid Build Coastguard Worker
3792*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
3793*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [clip_20b_min]
3794*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [clip_20b_max]
3795*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).pass1
3796*c0909341SAndroid Build Coastguard Worker.pass2:
3797*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3798*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3799*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
3800*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
3801*c0909341SAndroid Build Coastguard Worker    ; deinterleave
3802*c0909341SAndroid Build Coastguard Worker    REPX {pshufd x, x, q3120}, m0, m1, m2, m3, m4, m5, m6, m7
3803*c0909341SAndroid Build Coastguard Worker    ; transpose
3804*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m0, m1
3805*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m0, m1
3806*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m9, m2, m3
3807*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m3
3808*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m10, m4, m5
3809*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m5
3810*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m11, m6, m7
3811*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m7
3812*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3,  m0,  m2, 0x31   ; out6
3813*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1,  m0,  m2, 0x20   ; out2
3814*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7,  m4,  m6, 0x31   ; out7
3815*c0909341SAndroid Build Coastguard Worker    vperm2i128           m5,  m4,  m6, 0x20   ; out3
3816*c0909341SAndroid Build Coastguard Worker    vperm2i128          m13, m10, m11, 0x31   ; out5
3817*c0909341SAndroid Build Coastguard Worker    vperm2i128          m12, m10, m11, 0x20   ; out1
3818*c0909341SAndroid Build Coastguard Worker    vperm2i128          m11,  m8,  m9, 0x31   ; out4
3819*c0909341SAndroid Build Coastguard Worker    vperm2i128          m10,  m8,  m9, 0x20   ; out0
3820*c0909341SAndroid Build Coastguard Worker    call m(idct_4x16_internal_10bpc).pass1_main
3821*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m6, m10
3822*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m6, m11
3823*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m6, m12
3824*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m13
3825*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_17408]
3826*c0909341SAndroid Build Coastguard Worker    call m(idct_4x16_internal_10bpc).pass1_main2
3827*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7
3828*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
3829*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
3830*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
3831*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
3832*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
3833*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3
3834*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end2
3835*c0909341SAndroid Build Coastguard Worker
3836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct,      12
3837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst,     12
3838*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst, 12
3839*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity, 12
3840*c0909341SAndroid Build Coastguard Worker
3841*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
3842*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
3843*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
3844*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x4_internal_10bpc).pass1
3845*c0909341SAndroid Build Coastguard Worker.pass2:
3846*c0909341SAndroid Build Coastguard Worker    call .pass2_main
3847*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3
3848*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m4}, m0, m1, m2, m3
3849*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end2
3850*c0909341SAndroid Build Coastguard WorkerALIGN function_align
3851*c0909341SAndroid Build Coastguard Worker.pass2_main:
3852*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3853*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3854*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m6, m7
3855*c0909341SAndroid Build Coastguard Worker    pmaxsd               m8, m4, m12
3856*c0909341SAndroid Build Coastguard Worker    pmaxsd               m9, m5, m12
3857*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
3858*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_12bpc).transpose_4x8
3859*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m0
3860*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*2], m1
3861*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*4], m2
3862*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*6], m3
3863*c0909341SAndroid Build Coastguard Worker    pminsd               m0, m8, m13
3864*c0909341SAndroid Build Coastguard Worker    pminsd               m1, m9, m13
3865*c0909341SAndroid Build Coastguard Worker    pminsd               m2, m6, m13
3866*c0909341SAndroid Build Coastguard Worker    pminsd               m3, m7, m13
3867*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x4_internal_12bpc).transpose_4x8
3868*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m0
3869*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*3], m1
3870*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*5], m2
3871*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*7], m3
3872*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main
3873*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_2048]
3874*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_10bpc).main_end
3875*c0909341SAndroid Build Coastguard Worker    psrad                m0, m4, 15
3876*c0909341SAndroid Build Coastguard Worker    psrad                m1, m5, 15
3877*c0909341SAndroid Build Coastguard Worker    psrad                m2, 15
3878*c0909341SAndroid Build Coastguard Worker    psrad                m3, 15
3879*c0909341SAndroid Build Coastguard Worker    psrad                m4, m8, 15
3880*c0909341SAndroid Build Coastguard Worker    psrad                m5, m9, 15
3881*c0909341SAndroid Build Coastguard Worker    psrad                m6, 15
3882*c0909341SAndroid Build Coastguard Worker    psrad                m7, 15
3883*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
3884*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
3885*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
3886*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
3887*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_16384]
3888*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
3889*c0909341SAndroid Build Coastguard Worker    ret
3890*c0909341SAndroid Build Coastguard Worker
3891*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct,      12
3892*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst,     12
3893*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst, 12
3894*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity, 12
3895*c0909341SAndroid Build Coastguard Worker
3896*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
3897*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
3898*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
3899*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_16x4_internal_10bpc).pass1
3900*c0909341SAndroid Build Coastguard Worker.pass2:
3901*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x4_internal_12bpc).pass2_main
3902*c0909341SAndroid Build Coastguard Worker    vpermq               m7, m0, q3120
3903*c0909341SAndroid Build Coastguard Worker    vpermq               m6, m1, q3120
3904*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m2, q3120
3905*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m3, q3120
3906*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4
3907*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m4
3908*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m6, m4
3909*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m7, m4
3910*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end2
3911*c0909341SAndroid Build Coastguard Worker
3912*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct,      12
3913*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst,     12
3914*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst, 12
3915*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity, 12
3916*c0909341SAndroid Build Coastguard Worker
3917*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
3918*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_1697]
3919*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [cq+32*0], q3120 ; 0 1
3920*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [cq+32*1], q3120 ; 2 3
3921*c0909341SAndroid Build Coastguard Worker    vpermq               m2, [cq+32*2], q3120 ; 4 5
3922*c0909341SAndroid Build Coastguard Worker    vpermq               m3, [cq+32*3], q3120 ; 6 7
3923*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_3072]
3924*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m8, m0
3925*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m8, m1
3926*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m8, m2
3927*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m8, m3
3928*c0909341SAndroid Build Coastguard Worker    vpermq              m10, [cq+32*4], q3120 ; 8 9
3929*c0909341SAndroid Build Coastguard Worker    vpermq              m11, [cq+32*5], q3120 ; a b
3930*c0909341SAndroid Build Coastguard Worker    vpermq              m12, [cq+32*6], q3120 ; c d
3931*c0909341SAndroid Build Coastguard Worker    vpermq              m13, [cq+32*7], q3120 ; e f
3932*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m4, m5, m6, m7
3933*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m4, m5, m6, m7
3934*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4
3935*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m8, m10
3936*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5
3937*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m8, m11
3938*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6
3939*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m8, m12
3940*c0909341SAndroid Build Coastguard Worker    paddd                m3, m7
3941*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m8, m13
3942*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m4, m5, m6, m7
3943*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 12}, m4, m5, m6, m7
3944*c0909341SAndroid Build Coastguard Worker    paddd                m4, m10
3945*c0909341SAndroid Build Coastguard Worker    paddd                m5, m11
3946*c0909341SAndroid Build Coastguard Worker    paddd                m6, m12
3947*c0909341SAndroid Build Coastguard Worker    paddd                m7, m13
3948*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
3949*c0909341SAndroid Build Coastguard Worker.pass2:
3950*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3951*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3952*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
3953*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
3954*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5793]
3955*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_2048]
3956*c0909341SAndroid Build Coastguard Worker    REPX     {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7
3957*c0909341SAndroid Build Coastguard Worker    REPX     {paddd  x, m9}, m0, m1, m2, m3, m4, m5, m6, m7
3958*c0909341SAndroid Build Coastguard Worker    REPX     {psrad  x, 15}, m0, m1, m2, m3, m4, m5, m6, m7
3959*c0909341SAndroid Build Coastguard Worker    call m(idct_16x4_internal_10bpc).transpose_4x16_packed
3960*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pw_16384]
3961*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m4}, m0, m1, m2, m3
3962*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pixel_12bpc_max]
3963*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x4_internal_10bpc).end2
3964*c0909341SAndroid Build Coastguard Worker
3965*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2-3 10 ; type1, type2, bitdepth
3966*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, 0, 16x8, %3
3967*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
3968*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
3969*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_%3bpc]
3970*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
3971*c0909341SAndroid Build Coastguard Worker    or                  r3d, 8
3972*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
3973*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
3974*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
3975*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly2
3976*c0909341SAndroid Build Coastguard Worker%endif
3977*c0909341SAndroid Build Coastguard Worker%endmacro
3978*c0909341SAndroid Build Coastguard Worker
3979*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct
3980*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity
3981*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst
3982*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst
3983*c0909341SAndroid Build Coastguard Worker
3984*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
3985*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
3986*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
3987*c0909341SAndroid Build Coastguard Worker.pass1:
3988*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
3989*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+32* 1]
3990*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+32* 3]
3991*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+32* 5]
3992*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+32* 7]
3993*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+32* 9]
3994*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+32*11]
3995*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+32*13]
3996*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+32*15]
3997*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
3998*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
3999*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_rect2
4000*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+32* 0]
4001*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+32* 2]
4002*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+32* 4]
4003*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+32* 6]
4004*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+32* 8]
4005*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+32*10]
4006*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+32*12]
4007*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+32*14]
4008*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main_rect2
4009*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
4010*c0909341SAndroid Build Coastguard Worker    psrld               m11, 11 ; pd_1
4011*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
4012*c0909341SAndroid Build Coastguard Worker    call .pass1_rotations
4013*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
4014*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4015*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
4016*c0909341SAndroid Build Coastguard Worker.pass2:
4017*c0909341SAndroid Build Coastguard Worker    call .transpose
4018*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
4019*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_2048]
4020*c0909341SAndroid Build Coastguard Worker.end:
4021*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m10
4022*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m10
4023*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m10
4024*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m10
4025*c0909341SAndroid Build Coastguard Worker    call .write_16x4_start
4026*c0909341SAndroid Build Coastguard Worker.end2:
4027*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4, m10
4028*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m5, m10
4029*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m6, m10
4030*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m7, m10
4031*c0909341SAndroid Build Coastguard Worker    call .write_16x4_zero
4032*c0909341SAndroid Build Coastguard Worker    RET
4033*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4034*c0909341SAndroid Build Coastguard Worker.pass1_rotations:
4035*c0909341SAndroid Build Coastguard Worker    mova                m14, [r6-32*4]
4036*c0909341SAndroid Build Coastguard Worker    mova                m13, [r6-32*3]
4037*c0909341SAndroid Build Coastguard Worker    mova                m12, [r6-32*2]
4038*c0909341SAndroid Build Coastguard Worker    mova                m11, [r6-32*1]
4039*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6+32*0]
4040*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*1]
4041*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6+32*2]
4042*c0909341SAndroid Build Coastguard Worker    psubd               m15, m0, m14       ; out15
4043*c0909341SAndroid Build Coastguard Worker    paddd                m0, m14           ; out0
4044*c0909341SAndroid Build Coastguard Worker    psubd               m14, m1, m13       ; out14
4045*c0909341SAndroid Build Coastguard Worker    paddd                m1, m13           ; out1
4046*c0909341SAndroid Build Coastguard Worker    psubd               m13, m2, m12       ; out13
4047*c0909341SAndroid Build Coastguard Worker    paddd                m2, m12           ; out2
4048*c0909341SAndroid Build Coastguard Worker    psubd               m12, m3, m11       ; out12
4049*c0909341SAndroid Build Coastguard Worker    paddd                m3, m11           ; out3
4050*c0909341SAndroid Build Coastguard Worker    psubd               m11, m4, m10       ; out11
4051*c0909341SAndroid Build Coastguard Worker    paddd                m4, m10           ; out4
4052*c0909341SAndroid Build Coastguard Worker    psubd               m10, m5, m9        ; out10
4053*c0909341SAndroid Build Coastguard Worker    paddd                m5, m9            ; out5
4054*c0909341SAndroid Build Coastguard Worker    psubd                m9, m6, m8        ; out9
4055*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8            ; out6
4056*c0909341SAndroid Build Coastguard Worker    psubd                m8, m7, [r6+32*3] ; out8
4057*c0909341SAndroid Build Coastguard Worker    paddd                m7, [r6+32*3]     ; out7
4058*c0909341SAndroid Build Coastguard Worker    ret
4059*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4060*c0909341SAndroid Build Coastguard Worker.transpose:
4061*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
4062*c0909341SAndroid Build Coastguard Worker.transpose2:
4063*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m8
4064*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m9
4065*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m10
4066*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m11
4067*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m12
4068*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m13
4069*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m14
4070*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m15
4071*c0909341SAndroid Build Coastguard Worker.transpose3:
4072*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m1
4073*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
4074*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m2, m3
4075*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3
4076*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m4, m5
4077*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m5
4078*c0909341SAndroid Build Coastguard Worker    punpckhwd            m5, m6, m7
4079*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7
4080*c0909341SAndroid Build Coastguard Worker    punpckhdq            m7, m4, m6
4081*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m6
4082*c0909341SAndroid Build Coastguard Worker    punpckldq            m6, m8, m2
4083*c0909341SAndroid Build Coastguard Worker    punpckhdq            m8, m2
4084*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m0, m1
4085*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m1
4086*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m3, m5
4087*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m5
4088*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m6, m3
4089*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m3
4090*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m2, m7
4091*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m7
4092*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m7, m8, m1
4093*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m8, m1
4094*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m4
4095*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m4
4096*c0909341SAndroid Build Coastguard Worker    vperm2i128           m4, m0, m5, 0x31
4097*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm5, 1
4098*c0909341SAndroid Build Coastguard Worker    vperm2i128           m5, m1, m6, 0x31
4099*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm6, 1
4100*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6, m2, m7, 0x31
4101*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, xm7, 1
4102*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7, m3, m8, 0x31
4103*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, xm8, 1
4104*c0909341SAndroid Build Coastguard Worker    ret
4105*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4106*c0909341SAndroid Build Coastguard Worker.write_16x4_start:
4107*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pixel_10bpc_max]
4108*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
4109*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
4110*c0909341SAndroid Build Coastguard Worker.write_16x4_zero:
4111*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m8}, 0, 1, 2, 3, 4, 5, 6, 7
4112*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*8
4113*c0909341SAndroid Build Coastguard Worker.write_16x4:
4114*c0909341SAndroid Build Coastguard Worker    paddw                m0, [dstq+strideq*0]
4115*c0909341SAndroid Build Coastguard Worker    paddw                m1, [dstq+strideq*1]
4116*c0909341SAndroid Build Coastguard Worker    paddw                m2, [dstq+strideq*2]
4117*c0909341SAndroid Build Coastguard Worker    paddw                m3, [dstq+r3       ]
4118*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsw x, m8}, m0, m1, m2, m3
4119*c0909341SAndroid Build Coastguard Worker    REPX     {pminsw x, m9}, m0, m1, m2, m3
4120*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*0], m0
4121*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*1], m1
4122*c0909341SAndroid Build Coastguard Worker    mova   [dstq+strideq*2], m2
4123*c0909341SAndroid Build Coastguard Worker    mova   [dstq+r3       ], m3
4124*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
4125*c0909341SAndroid Build Coastguard Worker    ret
4126*c0909341SAndroid Build Coastguard Worker
4127*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct
4128*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst
4129*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst
4130*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity
4131*c0909341SAndroid Build Coastguard Worker
4132*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4133*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
4134*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
4135*c0909341SAndroid Build Coastguard Worker.pass1:
4136*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
4137*c0909341SAndroid Build Coastguard Worker    call .main
4138*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_3072]
4139*c0909341SAndroid Build Coastguard Worker    psrld               m15, 11       ; pd_1
4140*c0909341SAndroid Build Coastguard Worker    psubd               m13, m14, m15 ; pd_3071
4141*c0909341SAndroid Build Coastguard Worker    call .pass1_rotations
4142*c0909341SAndroid Build Coastguard Worker.pass1_end:
4143*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 1 }, m0,  m1,  m2,  m3,  m12, m13, m14, m15
4144*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m4,  m5,  m6,  m7,  m8,  m9,  m10, m11
4145*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
4146*c0909341SAndroid Build Coastguard Worker.pass2:
4147*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose
4148*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
4149*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass2_end
4150*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_2048]
4151*c0909341SAndroid Build Coastguard Worker    pxor                m11, m11
4152*c0909341SAndroid Build Coastguard Worker    psubw               m11, m10
4153*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m10
4154*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m11
4155*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m10
4156*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m11
4157*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
4158*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m4, m10
4159*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m5, m11
4160*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m6, m10
4161*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m7, m11
4162*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4163*c0909341SAndroid Build Coastguard Worker    RET
4164*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4165*c0909341SAndroid Build Coastguard Worker.pass1_rotations:
4166*c0909341SAndroid Build Coastguard Worker    paddd                m0, m15
4167*c0909341SAndroid Build Coastguard Worker    psubd                m1, m15, m1
4168*c0909341SAndroid Build Coastguard Worker    paddd                m2, m15
4169*c0909341SAndroid Build Coastguard Worker    psubd                m3, m15, m3
4170*c0909341SAndroid Build Coastguard Worker    paddd                m4, m14
4171*c0909341SAndroid Build Coastguard Worker    psubd                m5, m13, m5
4172*c0909341SAndroid Build Coastguard Worker    paddd                m6, m14
4173*c0909341SAndroid Build Coastguard Worker    psubd                m7, m13, m7
4174*c0909341SAndroid Build Coastguard Worker    paddd                m8, m14, m9
4175*c0909341SAndroid Build Coastguard Worker    psubd                m9, m13, m10
4176*c0909341SAndroid Build Coastguard Worker    paddd               m10, m14, m11
4177*c0909341SAndroid Build Coastguard Worker    psubd               m11, m13, m12
4178*c0909341SAndroid Build Coastguard Worker    paddd               m12, m15, [r6-32*1]
4179*c0909341SAndroid Build Coastguard Worker    psubd               m13, m15, [r6-32*2]
4180*c0909341SAndroid Build Coastguard Worker    paddd               m14, m15, [r6-32*3]
4181*c0909341SAndroid Build Coastguard Worker    psubd               m15,      [r6-32*4]
4182*c0909341SAndroid Build Coastguard Worker    ret
4183*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4184*c0909341SAndroid Build Coastguard Worker.main:
4185*c0909341SAndroid Build Coastguard Worker    ; expects: m13 = clip_min   m14 = clip_max
4186*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
4187*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+32* 2]
4188*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+32*13]
4189*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+32* 6]
4190*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+32* 9]
4191*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m15, [cq+32*10]
4192*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m15, [cq+32* 5]
4193*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m15, [cq+32*14]
4194*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m15, [cq+32* 1]
4195*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_2048]
4196*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
4197*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
4198*c0909341SAndroid Build Coastguard Worker    call .main_part1
4199*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+32* 0]
4200*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+32*15]
4201*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+32* 4]
4202*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+32*11]
4203*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m15, [cq+32* 8]
4204*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m15, [cq+32* 7]
4205*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m15, [cq+32*12]
4206*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m15, [cq+32* 3]
4207*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
4208*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
4209*c0909341SAndroid Build Coastguard Worker.main_part2:
4210*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 0, 8, 9, 10, 12,  201, 4091
4211*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 2, 8, 9, 10, 12, 1751, 3703
4212*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 4, 8, 9, 10, 12, 3035, 2751
4213*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 6, 8, 9, 10, 12, 3857, 1380
4214*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t8a
4215*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t0a
4216*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1, m5 ; t9a
4217*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5     ; t1a
4218*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2, m6 ; t12a
4219*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6     ; t4a
4220*c0909341SAndroid Build Coastguard Worker    psubd                m6, m3, m7 ; t13a
4221*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; t5a
4222*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m13}, m8, m4, m5, m6, m0, m1, m2, m7
4223*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m8, m4, m5, m6, m0, m1, m2, m7
4224*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_4017]
4225*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_799]
4226*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         8, 4, 3, 9, _, 12, 10, 11
4227*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 5, 3, 9, _, 12, 11, 10
4228*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m2 ; t4
4229*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2     ; t0
4230*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m7 ; t5
4231*c0909341SAndroid Build Coastguard Worker    paddd                m1, m7     ; t1
4232*c0909341SAndroid Build Coastguard Worker    psubd                m7, m4, m6 ; t12a
4233*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6     ; t8a
4234*c0909341SAndroid Build Coastguard Worker    psubd                m6, m8, m5 ; t13a
4235*c0909341SAndroid Build Coastguard Worker    paddd                m5, m8     ; t9a
4236*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m13}, m3, m2, m7, m6, m0, m1, m4, m5
4237*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m3, m2, m7, m6, m0, m1, m4, m5
4238*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_3784]
4239*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1567]
4240*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 2, 8, 9, _, 12, 10, 11
4241*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 6, 8, 9, _, 12, 10, 11
4242*c0909341SAndroid Build Coastguard Worker    pminsd              m10, m14, [r6-32*4] ;  t2
4243*c0909341SAndroid Build Coastguard Worker    pminsd               m8, m14, [r6-32*3] ;  t3
4244*c0909341SAndroid Build Coastguard Worker    psubd                m9, m0, m10        ;  t2a
4245*c0909341SAndroid Build Coastguard Worker    paddd                m0, m10            ;  out0
4246*c0909341SAndroid Build Coastguard Worker    psubd               m10, m1, m8         ;  t3a
4247*c0909341SAndroid Build Coastguard Worker    paddd                m1, m8             ; -out15
4248*c0909341SAndroid Build Coastguard Worker    pmaxsd               m9, m13
4249*c0909341SAndroid Build Coastguard Worker    pmaxsd              m10, m13
4250*c0909341SAndroid Build Coastguard Worker    pminsd               m9, m14
4251*c0909341SAndroid Build Coastguard Worker    pminsd              m10, m14
4252*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m1
4253*c0909341SAndroid Build Coastguard Worker    mova                m11, [r6-32*1]      ;  t7a
4254*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6-32*2]      ;  t6a
4255*c0909341SAndroid Build Coastguard Worker    psubd                m8, m3, m11        ;  t7
4256*c0909341SAndroid Build Coastguard Worker    paddd               m11, m3             ;  out12
4257*c0909341SAndroid Build Coastguard Worker    paddd                m3, m2, m1         ; -out3
4258*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1             ;  t6
4259*c0909341SAndroid Build Coastguard Worker    pmaxsd               m8, m13
4260*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m13
4261*c0909341SAndroid Build Coastguard Worker    pminsd               m8, m14
4262*c0909341SAndroid Build Coastguard Worker    pminsd               m2, m14
4263*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m11
4264*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m2
4265*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6+32*3]      ;  t15
4266*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r6+32*2]      ;  t14
4267*c0909341SAndroid Build Coastguard Worker    paddd               m12, m7, m1         ; -out13
4268*c0909341SAndroid Build Coastguard Worker    psubd                m7, m1             ;  t15a
4269*c0909341SAndroid Build Coastguard Worker    psubd               m11, m6, m2         ;  t14a
4270*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6             ;  out2
4271*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m13
4272*c0909341SAndroid Build Coastguard Worker    pmaxsd              m11, m13
4273*c0909341SAndroid Build Coastguard Worker    pminsd               m7, m14
4274*c0909341SAndroid Build Coastguard Worker    pminsd              m11, m14
4275*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m12
4276*c0909341SAndroid Build Coastguard Worker    pminsd               m1, m14, [r6+32*0] ;  t10a
4277*c0909341SAndroid Build Coastguard Worker    pminsd              m12, m14, [r6+32*1] ;  t11a
4278*c0909341SAndroid Build Coastguard Worker    psubd                m6, m4, m1         ;  t10
4279*c0909341SAndroid Build Coastguard Worker    paddd                m1, m4             ; -out1
4280*c0909341SAndroid Build Coastguard Worker    psubd                m4, m5, m12        ;  t11
4281*c0909341SAndroid Build Coastguard Worker    paddd                m5, m12            ;  out14
4282*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_1448]
4283*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m13
4284*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m13
4285*c0909341SAndroid Build Coastguard Worker    pminsd               m6, m14
4286*c0909341SAndroid Build Coastguard Worker    pminsd               m4, m14
4287*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m12}, m9, m10, m8, m7, m11, m6, m4
4288*c0909341SAndroid Build Coastguard Worker    pmulld              m12, [r6-32*3]      ;  t6
4289*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m5
4290*c0909341SAndroid Build Coastguard Worker    paddd                m5, m11, m7        ; -out5  (unshifted)
4291*c0909341SAndroid Build Coastguard Worker    psubd               m11, m7             ;  out10 (unshifted)
4292*c0909341SAndroid Build Coastguard Worker    paddd                m7, m9, m10        ; -out7  (unshifted)
4293*c0909341SAndroid Build Coastguard Worker    psubd                m9, m10            ;  out8  (unshifted)
4294*c0909341SAndroid Build Coastguard Worker    psubd               m10, m6, m4         ; -out9  (unshifted)
4295*c0909341SAndroid Build Coastguard Worker    paddd                m6, m4             ;  out6  (unshifted)
4296*c0909341SAndroid Build Coastguard Worker    paddd                m4, m12, m8        ;  out4  (unshifted)
4297*c0909341SAndroid Build Coastguard Worker    psubd               m12, m8             ; -out11 (unshifted)
4298*c0909341SAndroid Build Coastguard Worker    ret
4299*c0909341SAndroid Build Coastguard Worker.main_part1:
4300*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 0, 8, 9, 10, 12,  995, 3973
4301*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 2, 8, 9, 10, 12, 2440, 3290
4302*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 4, 8, 9, 10, 12, 3513, 2106
4303*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 6, 8, 9, 10, 12, 4052,  601
4304*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t10a
4305*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t2a
4306*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1, m5 ; t11a
4307*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5     ; t3a
4308*c0909341SAndroid Build Coastguard Worker    psubd                m5, m2, m6 ; t14a
4309*c0909341SAndroid Build Coastguard Worker    paddd                m2, m6     ; t6a
4310*c0909341SAndroid Build Coastguard Worker    psubd                m6, m3, m7 ; t15a
4311*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; t7a
4312*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m13}, m8, m4, m5, m6, m0, m1, m2, m7
4313*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m8, m4, m5, m6, m0, m1, m2, m7
4314*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2276]
4315*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_3406]
4316*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         8, 4, 3, 9, _, 12, 10, 11
4317*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 5, 3, 9, _, 12, 11, 10
4318*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m2 ; t6
4319*c0909341SAndroid Build Coastguard Worker    paddd                m0, m2     ; t2
4320*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m7 ; t7
4321*c0909341SAndroid Build Coastguard Worker    paddd                m1, m7     ; t3
4322*c0909341SAndroid Build Coastguard Worker    psubd                m7, m4, m6 ; t14a
4323*c0909341SAndroid Build Coastguard Worker    paddd                m4, m6     ; t10a
4324*c0909341SAndroid Build Coastguard Worker    psubd                m6, m8, m5 ; t15a
4325*c0909341SAndroid Build Coastguard Worker    paddd                m5, m8     ; t11a
4326*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m13}, m3, m2, m7, m6, m0, m1, m4, m5
4327*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m3, m2, m7, m6 ; clip the rest later
4328*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_1567]
4329*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_3784]
4330*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 3, 8, 9, _, 12, 10, 11
4331*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 7, 8, 9, _, 12, 10, 11
4332*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
4333*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
4334*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
4335*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
4336*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
4337*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
4338*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
4339*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
4340*c0909341SAndroid Build Coastguard Worker    ret
4341*c0909341SAndroid Build Coastguard Worker
4342*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct
4343*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst
4344*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst
4345*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity
4346*c0909341SAndroid Build Coastguard Worker
4347*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4348*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
4349*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
4350*c0909341SAndroid Build Coastguard Worker.pass1:
4351*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
4352*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main
4353*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_3072]
4354*c0909341SAndroid Build Coastguard Worker    psrld               m15, 11
4355*c0909341SAndroid Build Coastguard Worker    psubd               m13, m14, m15
4356*c0909341SAndroid Build Coastguard Worker    call .pass1_rotations
4357*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x8_internal_10bpc).pass1_end
4358*c0909341SAndroid Build Coastguard Worker.pass2:
4359*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose
4360*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main
4361*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_8bpc).main_pass2_end
4362*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_2048]
4363*c0909341SAndroid Build Coastguard Worker    pxor                m11, m11
4364*c0909341SAndroid Build Coastguard Worker    psubw               m11, m10
4365*c0909341SAndroid Build Coastguard Worker    mova                m12, m0
4366*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m7, m11
4367*c0909341SAndroid Build Coastguard Worker    mova                 m7, m1
4368*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m6, m10
4369*c0909341SAndroid Build Coastguard Worker    mova                 m6, m2
4370*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m5, m11
4371*c0909341SAndroid Build Coastguard Worker    mova                 m5, m3
4372*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m4, m10
4373*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
4374*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m5, m11
4375*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m6, m10
4376*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m7, m11
4377*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m10
4378*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4379*c0909341SAndroid Build Coastguard Worker    RET
4380*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4381*c0909341SAndroid Build Coastguard Worker.pass1_rotations:
4382*c0909341SAndroid Build Coastguard Worker    psubd                m8, m13, m7
4383*c0909341SAndroid Build Coastguard Worker    paddd                m7, m14, m9
4384*c0909341SAndroid Build Coastguard Worker    paddd                m9, m14, m6
4385*c0909341SAndroid Build Coastguard Worker    psubd                m6, m13, m10
4386*c0909341SAndroid Build Coastguard Worker    psubd               m10, m13, m5
4387*c0909341SAndroid Build Coastguard Worker    paddd                m5, m14, m11
4388*c0909341SAndroid Build Coastguard Worker    paddd               m11, m14, m4
4389*c0909341SAndroid Build Coastguard Worker    psubd                m4, m13, m12
4390*c0909341SAndroid Build Coastguard Worker    psubd               m12, m15, m3
4391*c0909341SAndroid Build Coastguard Worker    paddd                m3, m15, [r6-32*1]
4392*c0909341SAndroid Build Coastguard Worker    paddd               m13, m15, m2
4393*c0909341SAndroid Build Coastguard Worker    psubd                m2, m15, [r6-32*2]
4394*c0909341SAndroid Build Coastguard Worker    psubd               m14, m15, m1
4395*c0909341SAndroid Build Coastguard Worker    mova                 m1, m15
4396*c0909341SAndroid Build Coastguard Worker    paddd               m15, m0
4397*c0909341SAndroid Build Coastguard Worker    psubd                m0, m1, [r6-32*4]
4398*c0909341SAndroid Build Coastguard Worker    paddd                m1,     [r6-32*3]
4399*c0909341SAndroid Build Coastguard Worker    ret
4400*c0909341SAndroid Build Coastguard Worker
4401*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct
4402*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst
4403*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst
4404*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity
4405*c0909341SAndroid Build Coastguard Worker
4406*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4407*c0909341SAndroid Build Coastguard Worker.pass1:
4408*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
4409*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+32* 0]
4410*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+32* 1]
4411*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+32* 2]
4412*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+32* 3]
4413*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m15, [cq+32* 4]
4414*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m15, [cq+32* 5]
4415*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m15, [cq+32* 6]
4416*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m15, [cq+32* 7]
4417*c0909341SAndroid Build Coastguard Worker    pmulld               m8, m15, [cq+32* 8]
4418*c0909341SAndroid Build Coastguard Worker    pmulld               m9, m15, [cq+32* 9]
4419*c0909341SAndroid Build Coastguard Worker    pmulld              m10, m15, [cq+32*10]
4420*c0909341SAndroid Build Coastguard Worker    pmulld              m11, m15, [cq+32*11]
4421*c0909341SAndroid Build Coastguard Worker    pmulld              m12, m15, [cq+32*12]
4422*c0909341SAndroid Build Coastguard Worker    pmulld              m13, m15, [cq+32*13]
4423*c0909341SAndroid Build Coastguard Worker    pmulld              m14, m15, [cq+32*14]
4424*c0909341SAndroid Build Coastguard Worker    pmulld              m15,      [cq+32*15]
4425*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m7
4426*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_2048]
4427*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m7 }, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
4428*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4429*c0909341SAndroid Build Coastguard Worker    paddd                m7, [rsp]
4430*c0909341SAndroid Build Coastguard Worker    REPX    {psrad  x, 12 }, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
4431*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4432*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
4433*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_5793]
4434*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m15}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
4435*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14
4436*c0909341SAndroid Build Coastguard Worker    pmulld              m15, [rsp]
4437*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m7
4438*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_3072]
4439*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m7 }, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
4440*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4441*c0909341SAndroid Build Coastguard Worker    paddd                m7, [rsp]
4442*c0909341SAndroid Build Coastguard Worker    REPX    {psrad  x, 12 }, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
4443*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4444*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
4445*c0909341SAndroid Build Coastguard Worker.pass2:
4446*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose
4447*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_4096]
4448*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).end
4449*c0909341SAndroid Build Coastguard Worker
4450*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct,      12
4451*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity, 12
4452*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst,     12
4453*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst, 12
4454*c0909341SAndroid Build Coastguard Worker
4455*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4456*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
4457*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
4458*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).pass1
4459*c0909341SAndroid Build Coastguard Worker.pass2:
4460*c0909341SAndroid Build Coastguard Worker    call .pass2_main
4461*c0909341SAndroid Build Coastguard Worker    RET
4462*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4463*c0909341SAndroid Build Coastguard Worker.pass2_main:
4464*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_12bpc).transpose
4465*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
4466*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
4467*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
4468*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
4469*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
4470*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
4471*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).round_shift4
4472*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m0
4473*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m1
4474*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m2
4475*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m3
4476*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m4
4477*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m5
4478*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m6
4479*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m7
4480*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+32*0]
4481*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+32*1]
4482*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+32*2]
4483*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+32*3]
4484*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+32*4]
4485*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+32*5]
4486*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+32*6]
4487*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+32*7]
4488*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
4489*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
4490*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).round_shift4
4491*c0909341SAndroid Build Coastguard Worker.end:
4492*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+32* 8]
4493*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+32* 9]
4494*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq+32*10]
4495*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq+32*11]
4496*c0909341SAndroid Build Coastguard Worker    packssdw             m4, [cq+32*12]
4497*c0909341SAndroid Build Coastguard Worker    packssdw             m5, [cq+32*13]
4498*c0909341SAndroid Build Coastguard Worker    packssdw             m6, [cq+32*14]
4499*c0909341SAndroid Build Coastguard Worker    packssdw             m7, [cq+32*15]
4500*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3
4501*c0909341SAndroid Build Coastguard Worker    call .write_16x4_start
4502*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4503*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m4, q3120
4504*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m5, q3120
4505*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m6, q3120
4506*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m7, q3120
4507*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).write_16x4_zero
4508*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4509*c0909341SAndroid Build Coastguard Worker.write_16x4_start:
4510*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pixel_12bpc_max]
4511*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
4512*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
4513*c0909341SAndroid Build Coastguard Worker    ret
4514*c0909341SAndroid Build Coastguard Worker
4515*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct,      12
4516*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst,     12
4517*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst, 12
4518*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity, 12
4519*c0909341SAndroid Build Coastguard Worker
4520*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4521*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_min]
4522*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_20b_max]
4523*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x8_internal_10bpc).pass1
4524*c0909341SAndroid Build Coastguard Worker.pass2:
4525*c0909341SAndroid Build Coastguard Worker    call .pass2_main
4526*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).end
4527*c0909341SAndroid Build Coastguard Worker    RET
4528*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4529*c0909341SAndroid Build Coastguard Worker.pass2_main:
4530*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_12bpc).transpose
4531*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
4532*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
4533*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
4534*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
4535*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
4536*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_12bpc).pass2_main2
4537*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m0
4538*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m1
4539*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m2
4540*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m3
4541*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m4
4542*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m5
4543*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m6
4544*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m7
4545*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+32*0]
4546*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+32*1]
4547*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+32*2]
4548*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+32*3]
4549*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+32*4]
4550*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+32*5]
4551*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+32*6]
4552*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+32*7]
4553*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
4554*c0909341SAndroid Build Coastguard Worker    call m(iadst_8x8_internal_12bpc).pass2_main2
4555*c0909341SAndroid Build Coastguard Worker    ret
4556*c0909341SAndroid Build Coastguard Worker
4557*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct,      12
4558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst,     12
4559*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst, 12
4560*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity, 12
4561*c0909341SAndroid Build Coastguard Worker
4562*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4563*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_min]
4564*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_20b_max]
4565*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_16x8_internal_10bpc).pass1
4566*c0909341SAndroid Build Coastguard Worker.pass2:
4567*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_12bpc).pass2_main
4568*c0909341SAndroid Build Coastguard Worker    packssdw            m13, m0, [cq+32* 8]
4569*c0909341SAndroid Build Coastguard Worker    packssdw            m12, m1, [cq+32* 9]
4570*c0909341SAndroid Build Coastguard Worker    packssdw            m11, m2, [cq+32*10]
4571*c0909341SAndroid Build Coastguard Worker    packssdw            m10, m3, [cq+32*11]
4572*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m4, [cq+32*12]
4573*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m5, [cq+32*13]
4574*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m6, [cq+32*14]
4575*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m7, [cq+32*15]
4576*c0909341SAndroid Build Coastguard Worker    REPX {vpermq x, x, q3120}, m0, m1, m2, m3
4577*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).write_16x4_start
4578*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4579*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m10, q3120
4580*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m11, q3120
4581*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m12, q3120
4582*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m13, q3120
4583*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4584*c0909341SAndroid Build Coastguard Worker    RET
4585*c0909341SAndroid Build Coastguard Worker
4586*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct,      12
4587*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst,     12
4588*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst, 12
4589*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity, 12
4590*c0909341SAndroid Build Coastguard Worker
4591*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
4592*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_16x8_internal_10bpc).pass1
4593*c0909341SAndroid Build Coastguard Worker.pass2:
4594*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose2
4595*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_4096]
4596*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m10
4597*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m10
4598*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m10
4599*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m10
4600*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).write_16x4_start
4601*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4602*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).end2
4603*c0909341SAndroid Build Coastguard Worker
4604*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2-4 0,10 ; type1, type2, eob_offset, bitdepth
4605*c0909341SAndroid Build Coastguard Worker    INV_TXFM_FN          %1, %2, %3, 16x16, %4
4606*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct
4607*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
4608*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_%4bpc]
4609*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
4610*c0909341SAndroid Build Coastguard Worker    or                  r3d, 16
4611*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
4612*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
4613*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly3
4614*c0909341SAndroid Build Coastguard Worker%endif
4615*c0909341SAndroid Build Coastguard Worker%endmacro
4616*c0909341SAndroid Build Coastguard Worker
4617*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct
4618*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity, 28
4619*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst
4620*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst
4621*c0909341SAndroid Build Coastguard Worker
4622*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
4623*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
4624*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
4625*c0909341SAndroid Build Coastguard Worker.pass1:
4626*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
4627*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
4628*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
4629*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
4630*c0909341SAndroid Build Coastguard Worker    jl .fast
4631*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
4632*c0909341SAndroid Build Coastguard Worker    call .main
4633*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
4634*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6-32*4]
4635*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*3]
4636*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*2]
4637*c0909341SAndroid Build Coastguard Worker    psubd               m15, m0, m10 ; out15
4638*c0909341SAndroid Build Coastguard Worker    paddd                m0, m10     ; out0
4639*c0909341SAndroid Build Coastguard Worker    psubd               m10, m1, m9  ; out14
4640*c0909341SAndroid Build Coastguard Worker    paddd                m1, m9      ; out1
4641*c0909341SAndroid Build Coastguard Worker    psubd                m9, m2, m8  ; out13
4642*c0909341SAndroid Build Coastguard Worker    paddd                m2, m8      ; out2
4643*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m0, m1, m2
4644*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
4645*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
4646*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
4647*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r6-32*1]
4648*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6+32*0]
4649*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6+32*1]
4650*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m9, m10, m15
4651*c0909341SAndroid Build Coastguard Worker    psubd                m8, m3, m2 ; out12
4652*c0909341SAndroid Build Coastguard Worker    paddd                m3, m2     ; out3
4653*c0909341SAndroid Build Coastguard Worker    psubd                m2, m4, m1 ; out11
4654*c0909341SAndroid Build Coastguard Worker    paddd                m4, m1     ; out4
4655*c0909341SAndroid Build Coastguard Worker    psubd                m1, m5, m0 ; out10
4656*c0909341SAndroid Build Coastguard Worker    paddd                m5, m0     ; out5
4657*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m3, m4, m5
4658*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
4659*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
4660*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
4661*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32*2]
4662*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6+32*3]
4663*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m1, m2, m8
4664*c0909341SAndroid Build Coastguard Worker    psubd                m5, m6, m4 ; out9
4665*c0909341SAndroid Build Coastguard Worker    paddd                m6, m4     ; out6
4666*c0909341SAndroid Build Coastguard Worker    psubd                m4, m7, m3 ; out8
4667*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; out7
4668*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m6, m7, m4, m5
4669*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
4670*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
4671*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
4672*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m4
4673*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m5
4674*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m1
4675*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m2
4676*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m8
4677*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m9
4678*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m10
4679*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m15
4680*c0909341SAndroid Build Coastguard Worker.fast:
4681*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
4682*c0909341SAndroid Build Coastguard Worker    call .main
4683*c0909341SAndroid Build Coastguard Worker    mova                m14, [r6-32*4]
4684*c0909341SAndroid Build Coastguard Worker    mova                m13, [r6-32*3]
4685*c0909341SAndroid Build Coastguard Worker    mova                m12, [r6-32*2]
4686*c0909341SAndroid Build Coastguard Worker    mova                m11, [r6-32*1]
4687*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6+32*0]
4688*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*1]
4689*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6+32*2]
4690*c0909341SAndroid Build Coastguard Worker    psubd               m15, m0, m14       ; out15
4691*c0909341SAndroid Build Coastguard Worker    paddd                m0, m14           ; out0
4692*c0909341SAndroid Build Coastguard Worker    psubd               m14, m1, m13       ; out14
4693*c0909341SAndroid Build Coastguard Worker    paddd                m1, m13           ; out1
4694*c0909341SAndroid Build Coastguard Worker    psubd               m13, m2, m12       ; out13
4695*c0909341SAndroid Build Coastguard Worker    paddd                m2, m12           ; out2
4696*c0909341SAndroid Build Coastguard Worker    psubd               m12, m3, m11       ; out12
4697*c0909341SAndroid Build Coastguard Worker    paddd                m3, m11           ; out3
4698*c0909341SAndroid Build Coastguard Worker    psubd               m11, m4, m10       ; out11
4699*c0909341SAndroid Build Coastguard Worker    paddd                m4, m10           ; out4
4700*c0909341SAndroid Build Coastguard Worker    psubd               m10, m5, m9        ; out10
4701*c0909341SAndroid Build Coastguard Worker    paddd                m5, m9            ; out5
4702*c0909341SAndroid Build Coastguard Worker    psubd                m9, m6, m8        ; out9
4703*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8            ; out6
4704*c0909341SAndroid Build Coastguard Worker    psubd                m8, m7, [r6+32*3] ; out8
4705*c0909341SAndroid Build Coastguard Worker    paddd                m7, [r6+32*3]     ; out7
4706*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
4707*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
4708*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
4709*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
4710*c0909341SAndroid Build Coastguard Worker.pass2:
4711*c0909341SAndroid Build Coastguard Worker    call .transpose
4712*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
4713*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
4714*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
4715*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*1]
4716*c0909341SAndroid Build Coastguard Worker.end:
4717*c0909341SAndroid Build Coastguard Worker    call .write_16x16
4718*c0909341SAndroid Build Coastguard Worker    RET
4719*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4720*c0909341SAndroid Build Coastguard Worker.write_16x16:
4721*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*0], m8
4722*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*1], m9
4723*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*2], m12
4724*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
4725*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
4726*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
4727*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12
4728*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12
4729*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
4730*c0909341SAndroid Build Coastguard Worker.write_16x16_2:
4731*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m4
4732*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m5
4733*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m6
4734*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m7
4735*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4736*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+gprsize+32*0]
4737*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, [rsp+gprsize+32*1]
4738*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m10
4739*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m11
4740*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4741*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+gprsize+32*2]
4742*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m13
4743*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m14
4744*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m15
4745*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).write_16x4_zero
4746*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4747*c0909341SAndroid Build Coastguard Worker.transpose:
4748*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
4749*c0909341SAndroid Build Coastguard Worker    jl .transpose_fast
4750*c0909341SAndroid Build Coastguard Worker    packssdw             m8, [r6-32*4]
4751*c0909341SAndroid Build Coastguard Worker    packssdw             m9, [r6-32*3]
4752*c0909341SAndroid Build Coastguard Worker    packssdw            m10, [r6-32*2]
4753*c0909341SAndroid Build Coastguard Worker    packssdw            m11, [r6-32*1]
4754*c0909341SAndroid Build Coastguard Worker    packssdw            m12, [r6+32*0]
4755*c0909341SAndroid Build Coastguard Worker    packssdw            m13, [r6+32*1]
4756*c0909341SAndroid Build Coastguard Worker    packssdw            m14, [r6+32*2]
4757*c0909341SAndroid Build Coastguard Worker    packssdw            m15, [r6+32*3]
4758*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
4759*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [r6-32*4]
4760*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [r6-32*3]
4761*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [r6-32*2]
4762*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [r6-32*1]
4763*c0909341SAndroid Build Coastguard Worker    packssdw             m4, [r6+32*0]
4764*c0909341SAndroid Build Coastguard Worker    packssdw             m5, [r6+32*1]
4765*c0909341SAndroid Build Coastguard Worker    packssdw             m6, [r6+32*2]
4766*c0909341SAndroid Build Coastguard Worker    packssdw             m7, [r6+32*3]
4767*c0909341SAndroid Build Coastguard Worker    mova               [r6], m8
4768*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m1
4769*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
4770*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m2, m3
4771*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3
4772*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m6, m7
4773*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7
4774*c0909341SAndroid Build Coastguard Worker    punpcklwd            m7, m4, m5
4775*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m5
4776*c0909341SAndroid Build Coastguard Worker    punpckldq            m5, m8, m2
4777*c0909341SAndroid Build Coastguard Worker    punpckhdq            m8, m2
4778*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m0, m1
4779*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m1
4780*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m7, m6
4781*c0909341SAndroid Build Coastguard Worker    punpckldq            m7, m6
4782*c0909341SAndroid Build Coastguard Worker    punpckhdq            m6, m4, m3
4783*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m3
4784*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m2, m1
4785*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m1
4786*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m7
4787*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m7
4788*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m7, m8, m6
4789*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m8, m6
4790*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m5, m4
4791*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m4
4792*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6]
4793*c0909341SAndroid Build Coastguard Worker    mova               [r6], m8
4794*c0909341SAndroid Build Coastguard Worker    punpcklwd            m8, m4, m9
4795*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m9
4796*c0909341SAndroid Build Coastguard Worker    punpcklwd            m9, m10, m11
4797*c0909341SAndroid Build Coastguard Worker    punpckhwd           m10, m11
4798*c0909341SAndroid Build Coastguard Worker    punpckhwd           m11, m14, m15
4799*c0909341SAndroid Build Coastguard Worker    punpcklwd           m14, m15
4800*c0909341SAndroid Build Coastguard Worker    punpckhwd           m15, m12, m13
4801*c0909341SAndroid Build Coastguard Worker    punpcklwd           m12, m13
4802*c0909341SAndroid Build Coastguard Worker    punpckldq           m13, m4, m10
4803*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m10
4804*c0909341SAndroid Build Coastguard Worker    punpckhdq           m10, m8, m9
4805*c0909341SAndroid Build Coastguard Worker    punpckldq            m8, m9
4806*c0909341SAndroid Build Coastguard Worker    punpckhdq            m9, m12, m14
4807*c0909341SAndroid Build Coastguard Worker    punpckldq           m12, m14
4808*c0909341SAndroid Build Coastguard Worker    punpckhdq           m14, m15, m11
4809*c0909341SAndroid Build Coastguard Worker    punpckldq           m15, m11
4810*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m11, m10, m9
4811*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m10, m9
4812*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m9, m8, m12
4813*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m12
4814*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m12, m13, m15
4815*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m13, m15
4816*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m15, m4, m14
4817*c0909341SAndroid Build Coastguard Worker    punpcklqdq          m14, m4, m14
4818*c0909341SAndroid Build Coastguard Worker    vperm2i128           m4, m0, m8, 0x31
4819*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm8, 1
4820*c0909341SAndroid Build Coastguard Worker    vinserti128          m8, m5, xm12, 1
4821*c0909341SAndroid Build Coastguard Worker    vperm2i128          m12, m5, 0x13
4822*c0909341SAndroid Build Coastguard Worker    vperm2i128           m5, m1, m9, 0x31
4823*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm9, 1
4824*c0909341SAndroid Build Coastguard Worker    vinserti128          m9, m6, xm13, 1
4825*c0909341SAndroid Build Coastguard Worker    vperm2i128          m13, m6, 0x13
4826*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6, m2, m10, 0x31
4827*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, xm10, 1
4828*c0909341SAndroid Build Coastguard Worker    vinserti128         m10, m7, xm14, 1
4829*c0909341SAndroid Build Coastguard Worker    vperm2i128          m14, m7, 0x13
4830*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7, m3, m11, 0x31
4831*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, xm11, 1
4832*c0909341SAndroid Build Coastguard Worker    mova               xm11, [r6]
4833*c0909341SAndroid Build Coastguard Worker    vinserti128         m11, xm15, 1
4834*c0909341SAndroid Build Coastguard Worker    vinserti128         m15, [r6+16], 0
4835*c0909341SAndroid Build Coastguard Worker    ret
4836*c0909341SAndroid Build Coastguard Worker.transpose_fast:
4837*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose2
4838*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
4839*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
4840*c0909341SAndroid Build Coastguard Worker    ret
4841*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4842*c0909341SAndroid Build Coastguard Worker.main:
4843*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 1]
4844*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64* 3]
4845*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64* 5]
4846*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64* 7]
4847*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+64* 9]
4848*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+64*11]
4849*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*13]
4850*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+64*15]
4851*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
4852*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 0]
4853*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64* 2]
4854*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64* 4]
4855*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64* 6]
4856*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+64* 8]
4857*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+64*10]
4858*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*12]
4859*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+64*14]
4860*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
4861*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
4862*c0909341SAndroid Build Coastguard Worker    psrld               m10, m11, 10 ; pd_2
4863*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m10}, m0, m1, m2, m3, m4, m5, m6, m7
4864*c0909341SAndroid Build Coastguard Worker    ret
4865*c0909341SAndroid Build Coastguard Worker
4866*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct
4867*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst
4868*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst
4869*c0909341SAndroid Build Coastguard Worker
4870*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
4871*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
4872*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
4873*c0909341SAndroid Build Coastguard Worker.pass1:
4874*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
4875*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
4876*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
4877*c0909341SAndroid Build Coastguard Worker    jl .fast
4878*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
4879*c0909341SAndroid Build Coastguard Worker    call .main
4880*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
4881*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5120]
4882*c0909341SAndroid Build Coastguard Worker    paddd                m4, m8
4883*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8
4884*c0909341SAndroid Build Coastguard Worker    paddd                m9, m8
4885*c0909341SAndroid Build Coastguard Worker    paddd               m11, m8
4886*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5119]
4887*c0909341SAndroid Build Coastguard Worker    psubd                m5, m8, m5
4888*c0909341SAndroid Build Coastguard Worker    psubd                m7, m8, m7
4889*c0909341SAndroid Build Coastguard Worker    psubd               m10, m8, m10
4890*c0909341SAndroid Build Coastguard Worker    psubd               m12, m8, m12
4891*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m4, m5, m6, m7, m9, m10, m11, m12
4892*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
4893*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
4894*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
4895*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
4896*c0909341SAndroid Build Coastguard Worker    psrld                m4, m15, 10 ; pd_2
4897*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4
4898*c0909341SAndroid Build Coastguard Worker    psubd                m1, m4, m1
4899*c0909341SAndroid Build Coastguard Worker    paddd                m2, m4
4900*c0909341SAndroid Build Coastguard Worker    psubd                m3, m4, m3
4901*c0909341SAndroid Build Coastguard Worker    psubd                m7, m4, [r6-32*4]
4902*c0909341SAndroid Build Coastguard Worker    paddd                m6, m4, [r6-32*3]
4903*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4, [r6-32*2]
4904*c0909341SAndroid Build Coastguard Worker    paddd                m4,     [r6-32*1]
4905*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 2 }, m0, m1, m2, m3, m4, m5, m6, m7
4906*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
4907*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
4908*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
4909*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
4910*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
4911*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m9
4912*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m10
4913*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m11
4914*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m12
4915*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
4916*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
4917*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
4918*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
4919*c0909341SAndroid Build Coastguard Worker.fast:
4920*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
4921*c0909341SAndroid Build Coastguard Worker    call .main
4922*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_5120]
4923*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [pd_5119]
4924*c0909341SAndroid Build Coastguard Worker    psrld               m15, 10 ; pd_2
4925*c0909341SAndroid Build Coastguard Worker    paddd                m0, m15
4926*c0909341SAndroid Build Coastguard Worker    psubd                m1, m15, m1
4927*c0909341SAndroid Build Coastguard Worker    paddd                m2, m15
4928*c0909341SAndroid Build Coastguard Worker    psubd                m3, m15, m3
4929*c0909341SAndroid Build Coastguard Worker    paddd                m4, m14
4930*c0909341SAndroid Build Coastguard Worker    psubd                m5, m13, m5
4931*c0909341SAndroid Build Coastguard Worker    paddd                m6, m14
4932*c0909341SAndroid Build Coastguard Worker    psubd                m7, m13, m7
4933*c0909341SAndroid Build Coastguard Worker    paddd                m8, m14, m9
4934*c0909341SAndroid Build Coastguard Worker    psubd                m9, m13, m10
4935*c0909341SAndroid Build Coastguard Worker    paddd               m10, m14, m11
4936*c0909341SAndroid Build Coastguard Worker    psubd               m11, m13, m12
4937*c0909341SAndroid Build Coastguard Worker    paddd               m12, m15, [r6-32*1]
4938*c0909341SAndroid Build Coastguard Worker    psubd               m13, m15, [r6-32*2]
4939*c0909341SAndroid Build Coastguard Worker    paddd               m14, m15, [r6-32*3]
4940*c0909341SAndroid Build Coastguard Worker    psubd               m15,      [r6-32*4]
4941*c0909341SAndroid Build Coastguard Worker.pass1_end:
4942*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 2 }, m0,  m1,  m2,  m3,  m12, m13, m14, m15
4943*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m4,  m5,  m6,  m7,  m8,  m9,  m10, m11
4944*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
4945*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
4946*c0909341SAndroid Build Coastguard Worker.pass2:
4947*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_10bpc).transpose
4948*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
4949*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
4950*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_8bpc).main
4951*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_8bpc).main_pass2_end
4952*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*0], m8
4953*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*2], m12
4954*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*3], m13
4955*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
4956*c0909341SAndroid Build Coastguard Worker    pxor                m13, m13
4957*c0909341SAndroid Build Coastguard Worker    psubw               m13, m12
4958*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
4959*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m13, [rsp+32*1]
4960*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*1], m9
4961*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12
4962*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m13
4963*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
4964*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m4
4965*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m13, m5
4966*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m6
4967*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m13, m7
4968*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4969*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+32*0]
4970*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m13, [rsp+32*1]
4971*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m10
4972*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m13, m11
4973*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4974*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+32*2]
4975*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m13, [rsp+32*3]
4976*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m14
4977*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m13, m15
4978*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
4979*c0909341SAndroid Build Coastguard Worker    RET
4980*c0909341SAndroid Build Coastguard WorkerALIGN function_align
4981*c0909341SAndroid Build Coastguard Worker.main:
4982*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 2]
4983*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*13]
4984*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64* 6]
4985*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64* 9]
4986*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+64*10]
4987*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+64* 5]
4988*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*14]
4989*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+64* 1]
4990*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_2048]
4991*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main_part1
4992*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 0]
4993*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*15]
4994*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64* 4]
4995*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*11]
4996*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+64* 8]
4997*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+64* 7]
4998*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*12]
4999*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+64* 3]
5000*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x8_internal_10bpc).main_part2
5001*c0909341SAndroid Build Coastguard Worker
5002*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct
5003*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst
5004*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst
5005*c0909341SAndroid Build Coastguard Worker
5006*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5007*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
5008*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
5009*c0909341SAndroid Build Coastguard Worker.pass1:
5010*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
5011*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
5012*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
5013*c0909341SAndroid Build Coastguard Worker    jl .fast
5014*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
5015*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_10bpc).main
5016*c0909341SAndroid Build Coastguard Worker    sub                  cq, 32
5017*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5120]
5018*c0909341SAndroid Build Coastguard Worker    paddd               m11, m8
5019*c0909341SAndroid Build Coastguard Worker    paddd                m9, m8
5020*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8
5021*c0909341SAndroid Build Coastguard Worker    paddd                m4, m8
5022*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_5119]
5023*c0909341SAndroid Build Coastguard Worker    psubd               m12, m8, m12
5024*c0909341SAndroid Build Coastguard Worker    psubd               m10, m8, m10
5025*c0909341SAndroid Build Coastguard Worker    psubd                m7, m8, m7
5026*c0909341SAndroid Build Coastguard Worker    psubd                m5, m8, m5
5027*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m12, m11, m10, m9, m7, m6, m5, m4
5028*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m12
5029*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m11
5030*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m10
5031*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m9
5032*c0909341SAndroid Build Coastguard Worker    psrld                m9, m15, 10 ; pd_2
5033*c0909341SAndroid Build Coastguard Worker    psubd                m3, m9, m3
5034*c0909341SAndroid Build Coastguard Worker    paddd                m2, m9
5035*c0909341SAndroid Build Coastguard Worker    psubd                m1, m9, m1
5036*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9
5037*c0909341SAndroid Build Coastguard Worker    psubd               m12, m9, [r6-32*4]
5038*c0909341SAndroid Build Coastguard Worker    paddd               m11, m9, [r6-32*3]
5039*c0909341SAndroid Build Coastguard Worker    psubd               m10, m9, [r6-32*2]
5040*c0909341SAndroid Build Coastguard Worker    paddd                m9,     [r6-32*1]
5041*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 2 }, m12, m11, m10, m9, m3, m2, m1, m0
5042*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m12
5043*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m11
5044*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m10
5045*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m9
5046*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
5047*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m7
5048*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m6
5049*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m5
5050*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m4
5051*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m3
5052*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m2
5053*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m1
5054*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m0
5055*c0909341SAndroid Build Coastguard Worker.fast:
5056*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
5057*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_10bpc).main
5058*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_5120]
5059*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [pd_5119]
5060*c0909341SAndroid Build Coastguard Worker    psrld               m15, 10 ; pd_2
5061*c0909341SAndroid Build Coastguard Worker    psubd                m8, m13, m7
5062*c0909341SAndroid Build Coastguard Worker    paddd                m7, m14, m9
5063*c0909341SAndroid Build Coastguard Worker    paddd                m9, m14, m6
5064*c0909341SAndroid Build Coastguard Worker    psubd                m6, m13, m10
5065*c0909341SAndroid Build Coastguard Worker    psubd               m10, m13, m5
5066*c0909341SAndroid Build Coastguard Worker    paddd                m5, m14, m11
5067*c0909341SAndroid Build Coastguard Worker    paddd               m11, m14, m4
5068*c0909341SAndroid Build Coastguard Worker    psubd                m4, m13, m12
5069*c0909341SAndroid Build Coastguard Worker    psubd               m12, m15, m3
5070*c0909341SAndroid Build Coastguard Worker    paddd                m3, m15, [r6-32*1]
5071*c0909341SAndroid Build Coastguard Worker    paddd               m13, m15, m2
5072*c0909341SAndroid Build Coastguard Worker    psubd                m2, m15, [r6-32*2]
5073*c0909341SAndroid Build Coastguard Worker    psubd               m14, m15, m1
5074*c0909341SAndroid Build Coastguard Worker    mova                 m1, m15
5075*c0909341SAndroid Build Coastguard Worker    paddd               m15, m0
5076*c0909341SAndroid Build Coastguard Worker    psubd                m0, m1, [r6-32*4]
5077*c0909341SAndroid Build Coastguard Worker    paddd                m1,     [r6-32*3]
5078*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x16_internal_10bpc).pass1_end
5079*c0909341SAndroid Build Coastguard Worker.pass2:
5080*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_10bpc).transpose
5081*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
5082*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
5083*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_8bpc).main
5084*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_8bpc).main_pass2_end
5085*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*3], m3
5086*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*2], m2
5087*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*0], m0
5088*c0909341SAndroid Build Coastguard Worker    mova                 m2, m13
5089*c0909341SAndroid Build Coastguard Worker    mova                 m3, m12
5090*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
5091*c0909341SAndroid Build Coastguard Worker    pxor                m13, m13
5092*c0909341SAndroid Build Coastguard Worker    psubw               m13, m12
5093*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m13, m15
5094*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m14
5095*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m13
5096*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12
5097*c0909341SAndroid Build Coastguard Worker    mova                m14, m8
5098*c0909341SAndroid Build Coastguard Worker    mova                m15, m9
5099*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
5100*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m13, m11
5101*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m10
5102*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m13, m15
5103*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m14
5104*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5105*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m13, m7
5106*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m6
5107*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m13, m5
5108*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m4
5109*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5110*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m13, [rsp+32*3]
5111*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, [rsp+32*2]
5112*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m13, [rsp+32*1]
5113*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, [rsp+32*0]
5114*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5115*c0909341SAndroid Build Coastguard Worker    RET
5116*c0909341SAndroid Build Coastguard Worker
5117*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct, -92
5118*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity
5119*c0909341SAndroid Build Coastguard Worker
5120*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5121*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_5793]
5122*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_5120]
5123*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
5124*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
5125*c0909341SAndroid Build Coastguard Worker    jl .fast
5126*c0909341SAndroid Build Coastguard Worker    mov                  r3, -32*8*4
5127*c0909341SAndroid Build Coastguard Worker.righthalf:
5128*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+r3+32*33]
5129*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+r3+32*35]
5130*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+r3+32*37]
5131*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+r3+32*39]
5132*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*4
5133*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m7}, m0, m1, m2, m3
5134*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m0, m1, m2, m3
5135*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m0
5136*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m1
5137*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m2
5138*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m3
5139*c0909341SAndroid Build Coastguard Worker    add                  r3, 32*8
5140*c0909341SAndroid Build Coastguard Worker    jl .righthalf
5141*c0909341SAndroid Build Coastguard Worker.fast:
5142*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m15, [cq+64* 0]
5143*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m15, [cq+64* 1]
5144*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m15, [cq+64* 2]
5145*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15, [cq+64* 3]
5146*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m15, [cq+64* 4]
5147*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m15, [cq+64* 5]
5148*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m15, [cq+64* 6]
5149*c0909341SAndroid Build Coastguard Worker    pmulld               m8, m15, [cq+64* 7]
5150*c0909341SAndroid Build Coastguard Worker    mova               [cq], m8
5151*c0909341SAndroid Build Coastguard Worker    pmulld               m8, m15, [cq+64* 8]
5152*c0909341SAndroid Build Coastguard Worker    pmulld               m9, m15, [cq+64* 9]
5153*c0909341SAndroid Build Coastguard Worker    pmulld              m10, m15, [cq+64*10]
5154*c0909341SAndroid Build Coastguard Worker    pmulld              m11, m15, [cq+64*11]
5155*c0909341SAndroid Build Coastguard Worker    pmulld              m12, m15, [cq+64*12]
5156*c0909341SAndroid Build Coastguard Worker    pmulld              m13, m15, [cq+64*13]
5157*c0909341SAndroid Build Coastguard Worker    pmulld              m14, m15, [cq+64*14]
5158*c0909341SAndroid Build Coastguard Worker    pmulld              m15,      [cq+64*15]
5159*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m7}, m0,  m1,  m2,  m3,  m4,  m5,  m6, \
5160*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
5161*c0909341SAndroid Build Coastguard Worker    paddd                m7, [cq]
5162*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 13}, m0,  m1,  m2,  m3,  m4,  m5,  m6,  m7, \
5163*c0909341SAndroid Build Coastguard Worker                             m8,  m9,  m10, m11, m12, m13, m14, m15
5164*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
5165*c0909341SAndroid Build Coastguard Worker.pass2:
5166*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_10bpc).transpose
5167*c0909341SAndroid Build Coastguard Worker
5168*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*0], m15
5169*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m0
5170*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pw_1697x16]
5171*c0909341SAndroid Build Coastguard Worker
5172*c0909341SAndroid Build Coastguard Worker    REPX  {IDTX16 x, 0, 15},  1,  2,  3,  4,  5,  6,  7, \
5173*c0909341SAndroid Build Coastguard Worker                              8,  9, 10, 11, 12, 13, 14
5174*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*1]
5175*c0909341SAndroid Build Coastguard Worker    mova          [cq+32*1], m1
5176*c0909341SAndroid Build Coastguard Worker    IDTX16                0, 1, 15
5177*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*0]
5178*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m15, m1
5179*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m1
5180*c0909341SAndroid Build Coastguard Worker    paddsw              m15, m1
5181*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*1]
5182*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x16_internal_10bpc).end
5183*c0909341SAndroid Build Coastguard Worker
5184*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct,       0, 12
5185*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity, 28, 12
5186*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst,      0, 12
5187*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst,  0, 12
5188*c0909341SAndroid Build Coastguard Worker
5189*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5190*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
5191*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
5192*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x16_internal_10bpc).pass1
5193*c0909341SAndroid Build Coastguard Worker.pass2:
5194*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m8
5195*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m9
5196*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m10
5197*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m11
5198*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m12
5199*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m13
5200*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m14
5201*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m15
5202*c0909341SAndroid Build Coastguard Worker    call .pass2_main
5203*c0909341SAndroid Build Coastguard Worker    packssdw             m0,  m1
5204*c0909341SAndroid Build Coastguard Worker    packssdw             m1,  m2,  m3
5205*c0909341SAndroid Build Coastguard Worker    packssdw             m2,  m4,  m5
5206*c0909341SAndroid Build Coastguard Worker    packssdw             m3,  m6,  m7
5207*c0909341SAndroid Build Coastguard Worker    packssdw             m4,  m8,  m9
5208*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m10, m11
5209*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m12, m13
5210*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m14, m15
5211*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
5212*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
5213*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
5214*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
5215*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
5216*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
5217*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
5218*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
5219*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 8]
5220*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 9]
5221*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*10]
5222*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*11]
5223*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*12]
5224*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*13]
5225*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*14]
5226*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*15]
5227*c0909341SAndroid Build Coastguard Worker    mov                  r5, r6
5228*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*16
5229*c0909341SAndroid Build Coastguard Worker    call .pass2_main
5230*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x16_internal_12bpc).end
5231*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5232*c0909341SAndroid Build Coastguard Worker.write_16x16:
5233*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*0], m8
5234*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*1], m9
5235*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*2], m12
5236*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_16384]
5237*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5238*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5239*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12
5240*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12
5241*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).write_16x4_start
5242*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5243*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x16_internal_10bpc).write_16x16_2
5244*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5245*c0909341SAndroid Build Coastguard Worker.pass2_main:
5246*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).transpose_8x8
5247*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 0], m0
5248*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m2
5249*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 2], m4
5250*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m6
5251*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
5252*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
5253*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, m1
5254*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, m3
5255*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, m5
5256*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, m7
5257*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
5258*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5259*c0909341SAndroid Build Coastguard Worker    jge .pass2_slow
5260*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
5261*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
5262*c0909341SAndroid Build Coastguard Worker    jmp .pass2_fast
5263*c0909341SAndroid Build Coastguard Worker.pass2_slow:
5264*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
5265*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*4]
5266*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6-32*3]
5267*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6-32*2]
5268*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r6-32*1]
5269*c0909341SAndroid Build Coastguard Worker    mova                m12, [r6+32*0]
5270*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32*1]
5271*c0909341SAndroid Build Coastguard Worker    mova                m14, [r6+32*2]
5272*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6+32*3]
5273*c0909341SAndroid Build Coastguard Worker    TRANSPOSE_8X8_DWORD 8, 4, 10, 5, 12, 6, 14, 7, 9, 11, 13, 15
5274*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 4], m8
5275*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m10
5276*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 6], m12
5277*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m14
5278*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
5279*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
5280*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m4, m5, m6, m7
5281*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m4, m5, m6, m7
5282*c0909341SAndroid Build Coastguard Worker.pass2_fast:
5283*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
5284*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
5285*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
5286*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+32* 0]
5287*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+32* 1]
5288*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+32* 2]
5289*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+32* 3]
5290*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
5291*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5292*c0909341SAndroid Build Coastguard Worker    jge .pass2_slow2
5293*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
5294*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
5295*c0909341SAndroid Build Coastguard Worker    jmp .pass2_fast2
5296*c0909341SAndroid Build Coastguard Worker.pass2_slow2:
5297*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+32* 4]
5298*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+32* 5]
5299*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+32* 6]
5300*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+32* 7]
5301*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m4, m5, m6, m7
5302*c0909341SAndroid Build Coastguard Worker.pass2_fast2:
5303*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
5304*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
5305*c0909341SAndroid Build Coastguard Worker    psrad               m11, 8  ; pd_8
5306*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
5307*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).pass1_rotations
5308*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 4}, m0, m1, m2,  m3,  m4,  m5,  m6,  m7, \
5309*c0909341SAndroid Build Coastguard Worker                             m8, m9, m10, m11, m12, m13, m14, m15
5310*c0909341SAndroid Build Coastguard Worker    ret
5311*c0909341SAndroid Build Coastguard Worker
5312*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct,      0, 12
5313*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst,     0, 12
5314*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst, 0, 12
5315*c0909341SAndroid Build Coastguard Worker
5316*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5317*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_min]
5318*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_20b_max]
5319*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x16_internal_10bpc).pass1
5320*c0909341SAndroid Build Coastguard Worker.pass2:
5321*c0909341SAndroid Build Coastguard Worker    call .pass2_part1
5322*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).pass1_rotations
5323*c0909341SAndroid Build Coastguard Worker    call .pass2_part2
5324*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).pass1_rotations
5325*c0909341SAndroid Build Coastguard Worker.pass2_part3:
5326*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 4 }, m0, m1, m2, m3, m12, m13, m14, m15
5327*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 15}, m4, m5, m6, m7, m8,  m9,  m10, m11
5328*c0909341SAndroid Build Coastguard Worker.end:
5329*c0909341SAndroid Build Coastguard Worker    packssdw            m15, m14
5330*c0909341SAndroid Build Coastguard Worker    packssdw            m14, m13, m12
5331*c0909341SAndroid Build Coastguard Worker    packssdw            m13, m11, m10
5332*c0909341SAndroid Build Coastguard Worker    packssdw            m12,  m9,  m8
5333*c0909341SAndroid Build Coastguard Worker    packssdw            m11,  m7,  m6
5334*c0909341SAndroid Build Coastguard Worker    packssdw            m10,  m5,  m4
5335*c0909341SAndroid Build Coastguard Worker    packssdw             m7,  m3,  m2
5336*c0909341SAndroid Build Coastguard Worker    packssdw             m6,  m1,  m0
5337*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m6, [r5-32*4], 0x33
5338*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m6, [r5-32*4], 0xcc
5339*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m7, [r5-32*3], 0x33
5340*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m7, [r5-32*3], 0xcc
5341*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
5342*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
5343*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3120
5344*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m3, q2031
5345*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).write_16x4_start
5346*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5347*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m10, [r5-32*2], 0x33
5348*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m10, [r5-32*2], 0xcc
5349*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m11, [r5-32*1], 0x33
5350*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m11, [r5-32*1], 0xcc
5351*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
5352*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
5353*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3120
5354*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m3, q2031
5355*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5356*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m12, [r5+32*0], 0x33
5357*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m12, [r5+32*0], 0xcc
5358*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m13, [r5+32*1], 0x33
5359*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m13, [r5+32*1], 0xcc
5360*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
5361*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
5362*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3120
5363*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m3, q2031
5364*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5365*c0909341SAndroid Build Coastguard Worker    vpblendd             m0, m14, [r5+32*2], 0x33
5366*c0909341SAndroid Build Coastguard Worker    vpblendd             m1, m14, [r5+32*2], 0xcc
5367*c0909341SAndroid Build Coastguard Worker    vpblendd             m2, m15, [r5+32*3], 0x33
5368*c0909341SAndroid Build Coastguard Worker    vpblendd             m3, m15, [r5+32*3], 0xcc
5369*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
5370*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
5371*c0909341SAndroid Build Coastguard Worker    vpermq               m2, m2, q3120
5372*c0909341SAndroid Build Coastguard Worker    vpermq               m3, m3, q2031
5373*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_zero
5374*c0909341SAndroid Build Coastguard Worker    RET
5375*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5376*c0909341SAndroid Build Coastguard Worker.pass2_part1:
5377*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m8
5378*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m9
5379*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m10
5380*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m11
5381*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m12
5382*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m13
5383*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m14
5384*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m15
5385*c0909341SAndroid Build Coastguard Worker.pass2_main:
5386*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).transpose_8x8
5387*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 0], m0
5388*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m3
5389*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 2], m4
5390*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 3], m7
5391*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
5392*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
5393*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m13, m2
5394*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m13, m6
5395*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m13, m5
5396*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m13, m1
5397*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m0, m2, m5, m7
5398*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5399*c0909341SAndroid Build Coastguard Worker    jge .pass2_slow
5400*c0909341SAndroid Build Coastguard Worker    pxor                 m1, m1
5401*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m1}, m3, m4, m6
5402*c0909341SAndroid Build Coastguard Worker    jmp .pass2_fast
5403*c0909341SAndroid Build Coastguard Worker.pass2_slow:
5404*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
5405*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*4]
5406*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32*3]
5407*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6-32*2]
5408*c0909341SAndroid Build Coastguard Worker    mova                m11, [r6-32*1]
5409*c0909341SAndroid Build Coastguard Worker    mova                m12, [r6+32*0]
5410*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6+32*1]
5411*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32*2]
5412*c0909341SAndroid Build Coastguard Worker    mova                m15, [r6+32*3]
5413*c0909341SAndroid Build Coastguard Worker    TRANSPOSE_8X8_DWORD 8, 3, 4, 11, 12, 1, 6, 15, 13, 9, 10, 14
5414*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 4], m8
5415*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 5], m11
5416*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 6], m12
5417*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 7], m15
5418*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_min]
5419*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_18b_max]
5420*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m13}, m1, m3, m4, m6
5421*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m1, m3, m4, m6
5422*c0909341SAndroid Build Coastguard Worker.pass2_fast:
5423*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pd_2048]
5424*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2896]
5425*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main_part1
5426*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m13, [cq+32* 0] ;  0
5427*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m13, [cq+32* 1] ;  3
5428*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m13, [cq+32* 2] ;  4
5429*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m13, [cq+32* 3] ;  7
5430*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m0, m2, m5, m7
5431*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5432*c0909341SAndroid Build Coastguard Worker    jge .pass2_slow2
5433*c0909341SAndroid Build Coastguard Worker    pxor                 m1, m1
5434*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m1}, m3, m4, m6
5435*c0909341SAndroid Build Coastguard Worker    jmp .pass2_fast2
5436*c0909341SAndroid Build Coastguard Worker.pass2_slow2:
5437*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m13, [cq+32* 4] ;  8
5438*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m13, [cq+32* 5] ; 11
5439*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m13, [cq+32* 6] ; 12
5440*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m13, [cq+32* 7] ; 15
5441*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m14}, m1, m3, m4, m6
5442*c0909341SAndroid Build Coastguard Worker.pass2_fast2:
5443*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x8_internal_10bpc).main_part2
5444*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_17408]
5445*c0909341SAndroid Build Coastguard Worker    psrld               m15, 11              ; pd_1
5446*c0909341SAndroid Build Coastguard Worker    psubd               m13, m14, m15        ; pd_17407
5447*c0909341SAndroid Build Coastguard Worker    pslld               m15, 3               ; pd_8
5448*c0909341SAndroid Build Coastguard Worker    ret
5449*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5450*c0909341SAndroid Build Coastguard Worker.pass2_part2:
5451*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 4 }, m0, m1, m2, m3, m12, m13, m14, m15
5452*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 15}, m4, m5, m6, m7, m8,  m9,  m10, m11
5453*c0909341SAndroid Build Coastguard Worker    packssdw             m0,  m1
5454*c0909341SAndroid Build Coastguard Worker    packssdw             m1,  m2,  m3
5455*c0909341SAndroid Build Coastguard Worker    packssdw             m2,  m4,  m5
5456*c0909341SAndroid Build Coastguard Worker    packssdw             m3,  m6,  m7
5457*c0909341SAndroid Build Coastguard Worker    packssdw             m4,  m8,  m9
5458*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m10, m11
5459*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m12, m13
5460*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m14, m15
5461*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
5462*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
5463*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
5464*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
5465*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
5466*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
5467*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
5468*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
5469*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 8]
5470*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 9]
5471*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*10]
5472*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*11]
5473*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*12]
5474*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*13]
5475*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*14]
5476*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*15]
5477*c0909341SAndroid Build Coastguard Worker    mov                  r5, r6
5478*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*16
5479*c0909341SAndroid Build Coastguard Worker    jmp .pass2_main
5480*c0909341SAndroid Build Coastguard Worker
5481*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct,      0, 12
5482*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst,     0, 12
5483*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst, 0, 12
5484*c0909341SAndroid Build Coastguard Worker
5485*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5486*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_min]
5487*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [clip_20b_max]
5488*c0909341SAndroid Build Coastguard Worker    jmp m(iflipadst_16x16_internal_10bpc).pass1
5489*c0909341SAndroid Build Coastguard Worker.pass2:
5490*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_12bpc).pass2_part1
5491*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_16x8_internal_10bpc).pass1_rotations
5492*c0909341SAndroid Build Coastguard Worker    call m(iadst_16x16_internal_12bpc).pass2_part2
5493*c0909341SAndroid Build Coastguard Worker    call m(iflipadst_16x8_internal_10bpc).pass1_rotations
5494*c0909341SAndroid Build Coastguard Worker    jmp m(iadst_16x16_internal_12bpc).pass2_part3
5495*c0909341SAndroid Build Coastguard Worker
5496*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct,    -92, 12
5497*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity, 0, 12
5498*c0909341SAndroid Build Coastguard Worker
5499*c0909341SAndroid Build Coastguard Worker%macro IDTX16_12BPC 1 ; src
5500*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m7, m%1
5501*c0909341SAndroid Build Coastguard Worker    paddd                m6, m15
5502*c0909341SAndroid Build Coastguard Worker    psrad                m6, 12
5503*c0909341SAndroid Build Coastguard Worker    paddd                m6, m%1
5504*c0909341SAndroid Build Coastguard Worker    psrad               m%1, m6, 1
5505*c0909341SAndroid Build Coastguard Worker%endmacro
5506*c0909341SAndroid Build Coastguard Worker
5507*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2
5508*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_1697]
5509*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_5120]
5510*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
5511*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
5512*c0909341SAndroid Build Coastguard Worker    jl .fast
5513*c0909341SAndroid Build Coastguard Worker    mov                  r3, -32*8*4
5514*c0909341SAndroid Build Coastguard Worker.righthalf:
5515*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+r3+32*33]
5516*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+r3+32*35]
5517*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+r3+32*37]
5518*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+r3+32*39]
5519*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*4
5520*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m7, m10
5521*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m7, m11
5522*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m7, m12
5523*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m7, m13
5524*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m15}, m0, m1, m2, m3
5525*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
5526*c0909341SAndroid Build Coastguard Worker    paddd                m0, m10
5527*c0909341SAndroid Build Coastguard Worker    paddd                m1, m11
5528*c0909341SAndroid Build Coastguard Worker    paddd                m2, m12
5529*c0909341SAndroid Build Coastguard Worker    paddd                m3, m13
5530*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 1  }, m0, m1, m2, m3
5531*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m0
5532*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m1
5533*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m2
5534*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m3
5535*c0909341SAndroid Build Coastguard Worker    add                  r3, 32*8
5536*c0909341SAndroid Build Coastguard Worker    jl .righthalf
5537*c0909341SAndroid Build Coastguard Worker.fast:
5538*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 0]
5539*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64* 1]
5540*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64* 2]
5541*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64* 3]
5542*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+64* 4]
5543*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+64* 5]
5544*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+64* 6]
5545*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+64* 7]
5546*c0909341SAndroid Build Coastguard Worker    REPX   {IDTX16_12BPC x}, 0, 1, 2, 3, 4, 5, 8, 9
5547*c0909341SAndroid Build Coastguard Worker    mova          [cq+64*0], m8
5548*c0909341SAndroid Build Coastguard Worker    mova          [cq+64*1], m9
5549*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+64* 8]
5550*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+64* 9]
5551*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+64*10]
5552*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+64*11]
5553*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+64*12]
5554*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+64*13]
5555*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+64*14]
5556*c0909341SAndroid Build Coastguard Worker    REPX   {IDTX16_12BPC x}, 8, 9, 10, 11, 12, 13, 14
5557*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*15]
5558*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m6
5559*c0909341SAndroid Build Coastguard Worker    paddd                m7, m15
5560*c0909341SAndroid Build Coastguard Worker    psrad                m7, 12
5561*c0909341SAndroid Build Coastguard Worker    paddd                m7, m6
5562*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+64*0]
5563*c0909341SAndroid Build Coastguard Worker    psrad               m15, m7, 1
5564*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+64*1]
5565*c0909341SAndroid Build Coastguard Worker    jmp                tx2q
5566*c0909341SAndroid Build Coastguard Worker.pass2:
5567*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x16_internal_12bpc).pass2_main
5568*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_10bpc).transpose_fast
5569*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5570*c0909341SAndroid Build Coastguard Worker    jl .pass2_fast
5571*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 8], m0
5572*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 9], m1
5573*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*10], m2
5574*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*11], m3
5575*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m4
5576*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m5
5577*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m6
5578*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*15], m7
5579*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*4]
5580*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*3]
5581*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6-32*2]
5582*c0909341SAndroid Build Coastguard Worker    mova                m11, [r6-32*1]
5583*c0909341SAndroid Build Coastguard Worker    mova                m12, [r6+32*0]
5584*c0909341SAndroid Build Coastguard Worker    mova                m13, [r6+32*1]
5585*c0909341SAndroid Build Coastguard Worker    mova                m14, [r6+32*2]
5586*c0909341SAndroid Build Coastguard Worker    mova                m15, [r6+32*3]
5587*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*8
5588*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6-32*4]
5589*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6-32*3]
5590*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r6-32*2]
5591*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32*1]
5592*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32*0]
5593*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r6+32*1]
5594*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32*2]
5595*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6+32*3]
5596*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x16_internal_12bpc).pass2_main
5597*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose2
5598*c0909341SAndroid Build Coastguard Worker    mova                 m8, m0
5599*c0909341SAndroid Build Coastguard Worker    mova                 m9, m1
5600*c0909341SAndroid Build Coastguard Worker    mova                m10, m2
5601*c0909341SAndroid Build Coastguard Worker    mova                m11, m3
5602*c0909341SAndroid Build Coastguard Worker    mova                m12, m4
5603*c0909341SAndroid Build Coastguard Worker    mova                m13, m5
5604*c0909341SAndroid Build Coastguard Worker    mova                m14, m6
5605*c0909341SAndroid Build Coastguard Worker    mova                m15, m7
5606*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 8]
5607*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 9]
5608*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*10]
5609*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*11]
5610*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*12]
5611*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*13]
5612*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*14]
5613*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*15]
5614*c0909341SAndroid Build Coastguard Worker.pass2_fast:
5615*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_12bpc).write_16x16
5616*c0909341SAndroid Build Coastguard Worker    RET
5617*c0909341SAndroid Build Coastguard Worker
5618*c0909341SAndroid Build Coastguard Worker%macro IDCT32_END 6-7 1 ; in/out1, out2, tmp[1-3], shift, pack
5619*c0909341SAndroid Build Coastguard Worker    mova                m%4, [r6+32*(%1-4)]
5620*c0909341SAndroid Build Coastguard Worker    mova                m%2, [r5+32*(3-%1)]
5621*c0909341SAndroid Build Coastguard Worker    mova                m%5, [r4+32*(%1-4)]
5622*c0909341SAndroid Build Coastguard Worker    psubd               m%3, m%1, m%4 ; idct16 out15 - n
5623*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%4      ; idct16 out0  + n
5624*c0909341SAndroid Build Coastguard Worker    pmaxsd              m%1, m12
5625*c0909341SAndroid Build Coastguard Worker    pmaxsd              m%3, m12
5626*c0909341SAndroid Build Coastguard Worker    pminsd              m%1, m13
5627*c0909341SAndroid Build Coastguard Worker    pminsd              m%3, m13
5628*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m11
5629*c0909341SAndroid Build Coastguard Worker    paddd               m%3, m11
5630*c0909341SAndroid Build Coastguard Worker    psubd               m%4, m%1, m%2 ; out31 - n
5631*c0909341SAndroid Build Coastguard Worker    paddd               m%1, m%2      ; out0  + n
5632*c0909341SAndroid Build Coastguard Worker    paddd               m%2, m%3, m%5 ; out15 - n
5633*c0909341SAndroid Build Coastguard Worker    psubd               m%3, m%5      ; out16 + n
5634*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, %6}, m%1, m%3, m%2, m%4
5635*c0909341SAndroid Build Coastguard Worker%if %7 & 1
5636*c0909341SAndroid Build Coastguard Worker    packssdw            m%1, m%3      ; out0  + n, out16 + n
5637*c0909341SAndroid Build Coastguard Worker    packssdw            m%2, m%4      ; out15 - n, out31 - n
5638*c0909341SAndroid Build Coastguard Worker%endif
5639*c0909341SAndroid Build Coastguard Worker%endmacro
5640*c0909341SAndroid Build Coastguard Worker
5641*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_10bpc, 4, 7, 0, dst, stride, c, eob
5642*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
5643*c0909341SAndroid Build Coastguard Worker    jz .dconly
5644*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 7, 16, 32*12, dst, stride, c, eob
5645*c0909341SAndroid Build Coastguard Worker%undef cmp
5646*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
5647*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
5648*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
5649*c0909341SAndroid Build Coastguard Worker    vbroadcasti128      m14, [idct32_shuf]
5650*c0909341SAndroid Build Coastguard Worker    mov                  r4, cq
5651*c0909341SAndroid Build Coastguard Worker    call .pass1_main
5652*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*0], m2
5653*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*1], m3
5654*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 43
5655*c0909341SAndroid Build Coastguard Worker    jge .eob43
5656*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
5657*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, [rsp+32*2], m2, m3, m11
5658*c0909341SAndroid Build Coastguard Worker    jmp .pass1_end_fast
5659*c0909341SAndroid Build Coastguard Worker.eob43:
5660*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*8]
5661*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
5662*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
5663*c0909341SAndroid Build Coastguard Worker    call .pass1_main
5664*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*2], m2
5665*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 107
5666*c0909341SAndroid Build Coastguard Worker    jge .eob107
5667*c0909341SAndroid Build Coastguard Worker    mova                m11, m3
5668*c0909341SAndroid Build Coastguard Worker    mova                 m2, m0
5669*c0909341SAndroid Build Coastguard Worker    mova                 m3, m1
5670*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6-32*4]
5671*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6-32*3]
5672*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
5673*c0909341SAndroid Build Coastguard Worker.pass1_end_fast:
5674*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_2048]
5675*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
5676*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
5677*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast
5678*c0909341SAndroid Build Coastguard Worker    jmp .end
5679*c0909341SAndroid Build Coastguard Worker.eob107:
5680*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*3], m3
5681*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m0
5682*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m1
5683*c0909341SAndroid Build Coastguard Worker    call .pass1_main
5684*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 171
5685*c0909341SAndroid Build Coastguard Worker    jge .eob171
5686*c0909341SAndroid Build Coastguard Worker    pshufd              m12, m2, q1032
5687*c0909341SAndroid Build Coastguard Worker    pshufd              m13, m3, q1032
5688*c0909341SAndroid Build Coastguard Worker    mova                 m4, m0
5689*c0909341SAndroid Build Coastguard Worker    mova                 m5, m1
5690*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
5691*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m6}, m7, m14, m15
5692*c0909341SAndroid Build Coastguard Worker    jmp .pass1_end
5693*c0909341SAndroid Build Coastguard Worker.eob171:
5694*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m0
5695*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m1
5696*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m2
5697*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m3
5698*c0909341SAndroid Build Coastguard Worker    call .pass1_main
5699*c0909341SAndroid Build Coastguard Worker    pshufd              m12, [r6+32*2], q1032 ; out19 out17
5700*c0909341SAndroid Build Coastguard Worker    pshufd              m13, [r6+32*3], q1032 ; out23 out21
5701*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32*0]        ; out16 out18
5702*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r6+32*1]        ; out20 out22
5703*c0909341SAndroid Build Coastguard Worker    pshufd              m14, m2, q1032        ; out27 out25
5704*c0909341SAndroid Build Coastguard Worker    pshufd              m15, m3, q1032        ; out31 out29
5705*c0909341SAndroid Build Coastguard Worker    mova                 m6, m0               ; out24 out26
5706*c0909341SAndroid Build Coastguard Worker    mova                 m7, m1               ; out28 out30
5707*c0909341SAndroid Build Coastguard Worker.pass1_end:
5708*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6-32*4]        ; out0  out2
5709*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6-32*3]        ; out4  out6
5710*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r6-32*2]        ; out8  out10
5711*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32*1]        ; out12 out14
5712*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
5713*c0909341SAndroid Build Coastguard Worker    mova                m11, [rsp+32*3]       ; out13 out15
5714*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_2048]
5715*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_8bpc).main
5716*c0909341SAndroid Build Coastguard Worker.end: ; [rsp+0*32] = m12
5717*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
5718*c0909341SAndroid Build Coastguard Worker    mov                  cq, r4
5719*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*1], m8
5720*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*2], m9
5721*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*3], m10
5722*c0909341SAndroid Build Coastguard Worker    mova         [rsp+32*4], m11
5723*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
5724*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m1, q2031
5725*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5726*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5727*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4_start
5728*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m2, q3120
5729*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m3, q2031
5730*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5731*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5732*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5733*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m4, q3120
5734*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m5, q2031
5735*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5736*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5737*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5738*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m6, q3120
5739*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m7, q2031
5740*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5741*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5742*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5743*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [rsp+32*1], q3120
5744*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [rsp+32*2], q2031
5745*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5746*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5747*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5748*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [rsp+32*3], q3120
5749*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [rsp+32*4], q2031
5750*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5751*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5752*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5753*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [rsp+32*0], q3120
5754*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m13, q2031
5755*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5756*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5757*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5758*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m14, q3120
5759*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m15, q2031
5760*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
5761*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
5762*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
5763*c0909341SAndroid Build Coastguard Worker    RET
5764*c0909341SAndroid Build Coastguard Worker.dconly:
5765*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
5766*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m2, [dconly_10bpc]
5767*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
5768*c0909341SAndroid Build Coastguard Worker    or                  r3d, 32
5769*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
5770*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
5771*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3
5772*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5773*c0909341SAndroid Build Coastguard Worker.pass1_main_part1:
5774*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128*0]
5775*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*1]
5776*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*2]
5777*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*3]
5778*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128*4]
5779*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*5]
5780*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*6]
5781*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*7]
5782*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
5783*c0909341SAndroid Build Coastguard Worker    psrld                m1, m11, 10 ; pd_2
5784*c0909341SAndroid Build Coastguard Worker    REPX      {paddd x, m1}, m0, m6, m5, m3
5785*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6, m7  ; out1
5786*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7      ; out6
5787*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m9  ; out7
5788*c0909341SAndroid Build Coastguard Worker    paddd                m0, m9      ; out0
5789*c0909341SAndroid Build Coastguard Worker    paddd                m2, m5, m4  ; out2
5790*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4      ; out5
5791*c0909341SAndroid Build Coastguard Worker    psubd                m4, m3, m8  ; out4
5792*c0909341SAndroid Build Coastguard Worker    paddd                m3, m8      ; out3
5793*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 2 }, m0, m1, m2, m3, m4, m5, m6, m7
5794*c0909341SAndroid Build Coastguard Worker    ret
5795*c0909341SAndroid Build Coastguard WorkerALIGN function_align
5796*c0909341SAndroid Build Coastguard Worker.pass1_main:
5797*c0909341SAndroid Build Coastguard Worker    call .pass1_main_part1
5798*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
5799*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m1
5800*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m3
5801*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m5
5802*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m7
5803*c0909341SAndroid Build Coastguard Worker    pshufb               m0, m14
5804*c0909341SAndroid Build Coastguard Worker    pshufb               m2, m14
5805*c0909341SAndroid Build Coastguard Worker    pshufb               m4, m14
5806*c0909341SAndroid Build Coastguard Worker    pshufb               m6, m14
5807*c0909341SAndroid Build Coastguard Worker    punpckhdq            m3, m0, m2
5808*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
5809*c0909341SAndroid Build Coastguard Worker    punpckldq            m2, m4, m6
5810*c0909341SAndroid Build Coastguard Worker    punpckhdq            m4, m6
5811*c0909341SAndroid Build Coastguard Worker    vperm2i128           m1, m0, m2, 0x31 ; 4 6
5812*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm2, 1       ; 0 2
5813*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, m3, xm4, 1   ; 1 3
5814*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m4, 0x31     ; 5 7
5815*c0909341SAndroid Build Coastguard Worker    ret
5816*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast_rect2:
5817*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
5818*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
5819*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast: ; lower half zero
5820*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_4091]
5821*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_201]
5822*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_m1380]
5823*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_3857]
5824*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_3703]
5825*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1751]
5826*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_m2751]
5827*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3035]
5828*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m0
5829*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m8
5830*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m1
5831*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m9
5832*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m2
5833*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m10
5834*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m3
5835*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15
5836*c0909341SAndroid Build Coastguard Worker    jmp .main_oddhalf_part1_fast2
5837*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_rect2:
5838*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
5839*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
5840*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1: ; in1, in7, in9, in15, in17, in23, in25, in31
5841*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         0, 7, 8, 9, 10, _,  201, 4091 ; t16a, t31a
5842*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 1, 8, 9, 10, _, 3857, 1380 ; t19a, t28a
5843*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 5, 8, 9, 10, _, 1751, 3703 ; t18a, t29a
5844*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 3, 8, 9, 10, _, 3035, 2751 ; t17a, t30a
5845*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast2:
5846*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3
5847*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3
5848*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t17
5849*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t16
5850*c0909341SAndroid Build Coastguard Worker    psubd                m4, m6, m2 ; t18
5851*c0909341SAndroid Build Coastguard Worker    paddd                m6, m2     ; t19
5852*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m5 ; t29
5853*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5     ; t28
5854*c0909341SAndroid Build Coastguard Worker    psubd                m5, m7, m3 ; t30
5855*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; t31
5856*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m5, m4, m2, m0, m6, m1, m7
5857*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m5, m4, m2, m0, m6, m1, m7
5858*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_4017]
5859*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_799]
5860*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 8, 3, 9, _, 11, 10, 15    ; t17a, t30a
5861*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 4, 3, 9, _, 11, 10, 15, 2 ; t29a, t18a
5862*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m6 ; t19a
5863*c0909341SAndroid Build Coastguard Worker    paddd                m0, m6     ; t16a
5864*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7, m1 ; t28a
5865*c0909341SAndroid Build Coastguard Worker    paddd                m7, m1     ; t31a
5866*c0909341SAndroid Build Coastguard Worker    psubd                m1, m5, m4 ; t18
5867*c0909341SAndroid Build Coastguard Worker    paddd                m5, m4     ; t17
5868*c0909341SAndroid Build Coastguard Worker    psubd                m4, m8, m2 ; t29
5869*c0909341SAndroid Build Coastguard Worker    paddd                m8, m2     ; t30
5870*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m3, m6, m1, m4, m0, m7, m5, m8
5871*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m3, m6, m1, m4, m0, m7, m5, m8
5872*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3784]
5873*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1567]
5874*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 1, 2, 9, _, 11, 10, 15 ; t18a, t29a
5875*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 3, 2, 9, _, 11, 10, 15 ; t19,  t28
5876*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
5877*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m5
5878*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m4
5879*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m6
5880*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m3
5881*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m1
5882*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m8
5883*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
5884*c0909341SAndroid Build Coastguard Worker    ret
5885*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast_rect2:
5886*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
5887*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
5888*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast: ; lower half zero
5889*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pd_m601]
5890*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pd_4052]
5891*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [pd_3973]
5892*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pd_995]
5893*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pd_m2106]
5894*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_3513]
5895*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [pd_3290]
5896*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2440]
5897*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m0
5898*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m8
5899*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m1
5900*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m9
5901*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m2
5902*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m10
5903*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m3
5904*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15
5905*c0909341SAndroid Build Coastguard Worker    jmp .main_oddhalf_part2_fast2
5906*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_rect2:
5907*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7
5908*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7
5909*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2: ; in3, in5, in11, in13, in19, in21, in27, in29
5910*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         7, 0, 8, 9, 10, _, 4052,  601 ; t23a, t24a
5911*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 6, 8, 9, 10, _,  995, 3973 ; t20a, t27a
5912*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         5, 2, 8, 9, 10, _, 3513, 2106 ; t21a, t26a
5913*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 4, 8, 9, 10, _, 2440, 3290 ; t22a, t25a
5914*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast2:
5915*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3
5916*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3
5917*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m4 ; t25
5918*c0909341SAndroid Build Coastguard Worker    paddd                m0, m4     ; t24
5919*c0909341SAndroid Build Coastguard Worker    psubd                m4, m6, m2 ; t26
5920*c0909341SAndroid Build Coastguard Worker    paddd                m6, m2     ; t27
5921*c0909341SAndroid Build Coastguard Worker    psubd                m2, m1, m5 ; t21
5922*c0909341SAndroid Build Coastguard Worker    paddd                m1, m5     ; t20
5923*c0909341SAndroid Build Coastguard Worker    psubd                m5, m7, m3 ; t22
5924*c0909341SAndroid Build Coastguard Worker    paddd                m7, m3     ; t23
5925*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m5, m4, m2, m0, m6, m1, m7
5926*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m5, m4, m2, m0, m6, m1, m7
5927*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_2276]
5928*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_3406]
5929*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 2, 3, 9, _, 11, 10, 15    ; t21a, t26a
5930*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         8, 5, 3, 9, _, 11, 10, 15, 2 ; t25a, t22a
5931*c0909341SAndroid Build Coastguard Worker    psubd                m3, m0, m6 ; t27a
5932*c0909341SAndroid Build Coastguard Worker    paddd                m0, m6     ; t24a
5933*c0909341SAndroid Build Coastguard Worker    psubd                m6, m7, m1 ; t20a
5934*c0909341SAndroid Build Coastguard Worker    paddd                m7, m1     ; t23a
5935*c0909341SAndroid Build Coastguard Worker    psubd                m1, m5, m4 ; t21
5936*c0909341SAndroid Build Coastguard Worker    paddd                m5, m4     ; t22
5937*c0909341SAndroid Build Coastguard Worker    psubd                m4, m8, m2 ; t26
5938*c0909341SAndroid Build Coastguard Worker    paddd                m8, m2     ; t25
5939*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m3, m6, m1, m4, m0, m7, m5, m8
5940*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m3, m6, m1, m4, m0, m7, m5, m8
5941*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3784]
5942*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1567]
5943*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 1, 2, 9, _, 11, 10, 15, 2 ; t26a, t21a
5944*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 6, 2, 9, _, 11, 10, 15, 2 ; t27,  t20
5945*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*4] ; t16a
5946*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6-32*3] ; t17
5947*c0909341SAndroid Build Coastguard Worker    psubd                m2, m9, m7    ; t23
5948*c0909341SAndroid Build Coastguard Worker    paddd                m9, m7        ; t16
5949*c0909341SAndroid Build Coastguard Worker    psubd                m7, m10, m5   ; t22a
5950*c0909341SAndroid Build Coastguard Worker    paddd               m10, m5        ; t17a
5951*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m9, m10, m2, m7
5952*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m9, m10, m2, m7
5953*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m9
5954*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m10
5955*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*2] ; t18a
5956*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6-32*1] ; t19
5957*c0909341SAndroid Build Coastguard Worker    psubd                m5, m9, m1    ; t21
5958*c0909341SAndroid Build Coastguard Worker    paddd                m9, m1        ; t18
5959*c0909341SAndroid Build Coastguard Worker    psubd                m1, m10, m6   ; t20a
5960*c0909341SAndroid Build Coastguard Worker    paddd               m10, m6        ; t19a
5961*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m9, m10, m5, m1
5962*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m9, m10, m5, m1
5963*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m9
5964*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m10
5965*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*0] ; t28
5966*c0909341SAndroid Build Coastguard Worker    mova                m10, [r6+32*1] ; t29a
5967*c0909341SAndroid Build Coastguard Worker    psubd                m6, m9, m3    ; t27a
5968*c0909341SAndroid Build Coastguard Worker    paddd                m9, m3        ; t28a
5969*c0909341SAndroid Build Coastguard Worker    psubd                m3, m10, m4   ; t26
5970*c0909341SAndroid Build Coastguard Worker    paddd               m10, m4        ; t29
5971*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m9, m10, m6, m3
5972*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m9, m10, m6, m3
5973*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m14}, m6, m3, m1, m5
5974*c0909341SAndroid Build Coastguard Worker    paddd                m6, m11
5975*c0909341SAndroid Build Coastguard Worker    paddd                m3, m11
5976*c0909341SAndroid Build Coastguard Worker    psubd                m4, m6, m1    ; t20
5977*c0909341SAndroid Build Coastguard Worker    paddd                m6, m1        ; t27
5978*c0909341SAndroid Build Coastguard Worker    psubd                m1, m3, m5    ; t21a
5979*c0909341SAndroid Build Coastguard Worker    paddd                m3, m5        ; t26a
5980*c0909341SAndroid Build Coastguard Worker    REPX    {psrad  x, 12 }, m4, m1, m3, m6
5981*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
5982*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m1
5983*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32*2] ; t30
5984*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6+32*3] ; t31a
5985*c0909341SAndroid Build Coastguard Worker    psubd                m5, m4, m8    ; t25a
5986*c0909341SAndroid Build Coastguard Worker    paddd                m4, m8        ; t30a
5987*c0909341SAndroid Build Coastguard Worker    psubd                m8, m1, m0    ; t24
5988*c0909341SAndroid Build Coastguard Worker    paddd                m1, m0        ; t31
5989*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m5, m4, m1
5990*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m5, m4, m1
5991*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m14}, m5, m8, m7, m2
5992*c0909341SAndroid Build Coastguard Worker    paddd                m5, m11
5993*c0909341SAndroid Build Coastguard Worker    paddd                m8, m11
5994*c0909341SAndroid Build Coastguard Worker    psubd                m0, m5, m7    ; t22
5995*c0909341SAndroid Build Coastguard Worker    paddd                m5, m7        ; t25
5996*c0909341SAndroid Build Coastguard Worker    psubd                m7, m8, m2    ; t23a
5997*c0909341SAndroid Build Coastguard Worker    paddd                m2, m8        ; t24a
5998*c0909341SAndroid Build Coastguard Worker    REPX    {psrad  x, 12 }, m0, m7, m2, m5
5999*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m0
6000*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
6001*c0909341SAndroid Build Coastguard Worker    mov                  r4, r6
6002*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
6003*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m2
6004*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m5
6005*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m3
6006*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m6
6007*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m9
6008*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m10
6009*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m4
6010*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m1
6011*c0909341SAndroid Build Coastguard Worker    mov                  r5, r6
6012*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
6013*c0909341SAndroid Build Coastguard Worker    ret
6014*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6015*c0909341SAndroid Build Coastguard Worker.main_end:
6016*c0909341SAndroid Build Coastguard Worker    psrld               m11, 10 ; pd_2
6017*c0909341SAndroid Build Coastguard Worker    IDCT32_END            0, 15, 8, 9, 10, 2
6018*c0909341SAndroid Build Coastguard Worker    IDCT32_END            1, 14, 8, 9, 10, 2
6019*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m1   ; 16 17
6020*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1       ;  0  1
6021*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m14, m15 ; 14 15
6022*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 30 31
6023*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m8
6024*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m14
6025*c0909341SAndroid Build Coastguard Worker    IDCT32_END            2, 15, 8, 9, 10, 2
6026*c0909341SAndroid Build Coastguard Worker    IDCT32_END            3, 14, 8, 9, 10, 2
6027*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m2, m3   ; 18 19
6028*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3       ;  2  3
6029*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m14, m15 ; 12 13
6030*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 28 29
6031*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m8
6032*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m14
6033*c0909341SAndroid Build Coastguard Worker    IDCT32_END            4, 15, 8, 9, 10, 2
6034*c0909341SAndroid Build Coastguard Worker    IDCT32_END            5, 14, 8, 9, 10, 2
6035*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m4, m5   ; 20 21
6036*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m5       ;  4  5
6037*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m14, m15 ; 10 11
6038*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 26 27
6039*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m8
6040*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m14
6041*c0909341SAndroid Build Coastguard Worker    IDCT32_END            6, 15, 8, 9, 10, 2
6042*c0909341SAndroid Build Coastguard Worker    IDCT32_END            7, 14, 8, 9, 10, 2
6043*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m6, m7   ; 22 23
6044*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7       ;  6  7
6045*c0909341SAndroid Build Coastguard Worker    punpcklwd            m7, m14, m15 ;  8  9
6046*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 24 25
6047*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m8
6048*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m14
6049*c0909341SAndroid Build Coastguard Worker.transpose:
6050*c0909341SAndroid Build Coastguard Worker    punpckhdq           m15, m3, m1
6051*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m1
6052*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m4, m6
6053*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m6
6054*c0909341SAndroid Build Coastguard Worker    punpckhdq            m6, m0, m2
6055*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
6056*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m7, m5
6057*c0909341SAndroid Build Coastguard Worker    punpckldq            m7, m5
6058*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m2, m15
6059*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m2, m15
6060*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m15, m7, m3
6061*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m7, m3
6062*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m6, m1
6063*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m6, m1
6064*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m4
6065*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m4
6066*c0909341SAndroid Build Coastguard Worker    vperm2i128           m4, m0, m7, 0x31
6067*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm7, 1
6068*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7, m3, m2, 0x31
6069*c0909341SAndroid Build Coastguard Worker    vinserti128          m3, xm2, 1
6070*c0909341SAndroid Build Coastguard Worker    vinserti128          m2, m6, xm5, 1
6071*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6, m5, 0x31
6072*c0909341SAndroid Build Coastguard Worker    vperm2i128           m5, m1, m15, 0x31
6073*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm15, 1
6074*c0909341SAndroid Build Coastguard Worker    ret
6075*c0909341SAndroid Build Coastguard Worker
6076*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_10bpc, 4, 7, 8, dst, stride, c, eob
6077*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
6078*c0909341SAndroid Build Coastguard Worker.pass1:
6079*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_5]
6080*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
6081*c0909341SAndroid Build Coastguard Worker    mov                 r6d, eobd
6082*c0909341SAndroid Build Coastguard Worker    add                eobb, 21
6083*c0909341SAndroid Build Coastguard Worker    cmovc              eobd, r6d ; 43, 107, 171 -> 64, 128, 192
6084*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
6085*c0909341SAndroid Build Coastguard Worker    lea                  r5, [strideq*5]
6086*c0909341SAndroid Build Coastguard Worker    lea                  r4, [strideq+r6*2] ; strideq*7
6087*c0909341SAndroid Build Coastguard Worker.loop:
6088*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128*0]
6089*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+128*1]
6090*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*2]
6091*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+128*3]
6092*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*4]
6093*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq+128*5]
6094*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*6]
6095*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq+128*7]
6096*c0909341SAndroid Build Coastguard Worker    REPX     {paddsw x, m5}, m0, m1, m2, m3
6097*c0909341SAndroid Build Coastguard Worker    REPX     {psraw  x, 3 }, m0, m1, m2, m3
6098*c0909341SAndroid Build Coastguard Worker    call .main_zero
6099*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
6100*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*8]
6101*c0909341SAndroid Build Coastguard Worker    sub                eobd, 64
6102*c0909341SAndroid Build Coastguard Worker    jge .loop
6103*c0909341SAndroid Build Coastguard Worker    RET
6104*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6105*c0909341SAndroid Build Coastguard Worker.main_zero:
6106*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+128*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7
6107*c0909341SAndroid Build Coastguard Worker.main:
6108*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
6109*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
6110*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
6111*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
6112*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m0, m4
6113*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m4
6114*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m2, m1
6115*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m1
6116*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m2
6117*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m2
6118*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m3, m4
6119*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m4
6120*c0909341SAndroid Build Coastguard Worker    mova                xm4, [dstq+strideq*0]
6121*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, [dstq+strideq*4], 1
6122*c0909341SAndroid Build Coastguard Worker    paddw                m0, m4
6123*c0909341SAndroid Build Coastguard Worker    mova                xm4, [dstq+strideq*1]
6124*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, [dstq+r5       ], 1
6125*c0909341SAndroid Build Coastguard Worker    paddw                m1, m4
6126*c0909341SAndroid Build Coastguard Worker    mova                xm4, [dstq+strideq*2]
6127*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, [dstq+r6*2     ], 1
6128*c0909341SAndroid Build Coastguard Worker    paddw                m2, m4
6129*c0909341SAndroid Build Coastguard Worker    mova                xm4, [dstq+r6       ]
6130*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, [dstq+r4       ], 1
6131*c0909341SAndroid Build Coastguard Worker    paddw                m3, m4
6132*c0909341SAndroid Build Coastguard Worker    REPX     {pmaxsw x, m6}, m0, m1, m2, m3
6133*c0909341SAndroid Build Coastguard Worker    REPX     {pminsw x, m7}, m0, m1, m2, m3
6134*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*0], xm0
6135*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+strideq*4], m0, 1
6136*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*1], xm1
6137*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+r5       ], m1, 1
6138*c0909341SAndroid Build Coastguard Worker    mova         [dstq+strideq*2], xm2
6139*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+r6*2     ], m2, 1
6140*c0909341SAndroid Build Coastguard Worker    mova         [dstq+r6       ], xm3
6141*c0909341SAndroid Build Coastguard Worker    vextracti128 [dstq+r4       ], m3, 1
6142*c0909341SAndroid Build Coastguard Worker    ret
6143*c0909341SAndroid Build Coastguard Worker
6144*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_12bpc, 4, 7, 0, dst, stride, c, eob
6145*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
6146*c0909341SAndroid Build Coastguard Worker    jz .dconly
6147*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 7, 16, 32*24, dst, stride, c, eob
6148*c0909341SAndroid Build Coastguard Worker%undef cmp
6149*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
6150*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
6151*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
6152*c0909341SAndroid Build Coastguard Worker    mov                  r4, cq
6153*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
6154*c0909341SAndroid Build Coastguard Worker    call .pass1_main
6155*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 43
6156*c0909341SAndroid Build Coastguard Worker    jge .eob43
6157*c0909341SAndroid Build Coastguard Worker    jmp .pass2_fast
6158*c0909341SAndroid Build Coastguard Worker.eob43:
6159*c0909341SAndroid Build Coastguard Worker    call .pass1_main
6160*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 107
6161*c0909341SAndroid Build Coastguard Worker    jge .eob107
6162*c0909341SAndroid Build Coastguard Worker.pass2_fast:
6163*c0909341SAndroid Build Coastguard Worker    mov                  cq, r4
6164*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
6165*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
6166*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*1+ 0]
6167*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*7+ 0]
6168*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*1+32]
6169*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*7+32]
6170*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
6171*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
6172*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast
6173*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*3+ 0]
6174*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*5+ 0]
6175*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*3+32]
6176*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*5+32]
6177*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
6178*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast
6179*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*2+ 0]
6180*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*6+ 0]
6181*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*2+32]
6182*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*6+32]
6183*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
6184*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_fast
6185*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*0+ 0]
6186*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*4+ 0]
6187*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*0+32]
6188*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*4+32]
6189*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3
6190*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
6191*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
6192*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
6193*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
6194*c0909341SAndroid Build Coastguard Worker    jmp .pass2_end
6195*c0909341SAndroid Build Coastguard Worker.eob107:
6196*c0909341SAndroid Build Coastguard Worker    call .pass1_main
6197*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 171
6198*c0909341SAndroid Build Coastguard Worker    jge .eob171
6199*c0909341SAndroid Build Coastguard Worker    jmp .pass2
6200*c0909341SAndroid Build Coastguard Worker.eob171:
6201*c0909341SAndroid Build Coastguard Worker    call .pass1_main
6202*c0909341SAndroid Build Coastguard Worker.pass2:
6203*c0909341SAndroid Build Coastguard Worker    mov                  cq, r4
6204*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
6205*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
6206*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*1+ 0]
6207*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*7+ 0]
6208*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*1+32]
6209*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*7+32]
6210*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+128*1+64]
6211*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+128*7+64]
6212*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+128*1+96]
6213*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+128*7+96]
6214*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
6215*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
6216*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1
6217*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*3+ 0]
6218*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*5+ 0]
6219*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*3+32]
6220*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*5+32]
6221*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+128*3+64]
6222*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+128*5+64]
6223*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+128*3+96]
6224*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+128*5+96]
6225*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
6226*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2
6227*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*2+ 0]
6228*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*6+ 0]
6229*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*2+32]
6230*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*6+32]
6231*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+128*2+64]
6232*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+128*6+64]
6233*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+128*2+96]
6234*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+128*6+96]
6235*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
6236*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
6237*c0909341SAndroid Build Coastguard Worker    pmaxsd               m0, m12, [cq+128*0+ 0]
6238*c0909341SAndroid Build Coastguard Worker    pmaxsd               m1, m12, [cq+128*4+ 0]
6239*c0909341SAndroid Build Coastguard Worker    pmaxsd               m2, m12, [cq+128*0+32]
6240*c0909341SAndroid Build Coastguard Worker    pmaxsd               m3, m12, [cq+128*4+32]
6241*c0909341SAndroid Build Coastguard Worker    pmaxsd               m4, m12, [cq+128*0+64]
6242*c0909341SAndroid Build Coastguard Worker    pmaxsd               m5, m12, [cq+128*4+64]
6243*c0909341SAndroid Build Coastguard Worker    pmaxsd               m6, m12, [cq+128*0+96]
6244*c0909341SAndroid Build Coastguard Worker    pmaxsd               m7, m12, [cq+128*4+96]
6245*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7
6246*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
6247*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
6248*c0909341SAndroid Build Coastguard Worker.pass2_end:
6249*c0909341SAndroid Build Coastguard Worker    psrld               m11, 8 ; pd_8
6250*c0909341SAndroid Build Coastguard Worker    IDCT32_END            0, 15, 8, 9, 10, 4
6251*c0909341SAndroid Build Coastguard Worker    IDCT32_END            1, 14, 8, 9, 10, 4
6252*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m8, m0, m1   ; 16 17 (interleaved)
6253*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m1       ;  0  1 (interleaved)
6254*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m1, m14, m15 ; 14 15 (interleaved)
6255*c0909341SAndroid Build Coastguard Worker    punpckhqdq          m14, m15      ; 30 31 (interleaved)
6256*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m8
6257*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m14
6258*c0909341SAndroid Build Coastguard Worker    IDCT32_END            2, 15, 8, 9, 10, 4
6259*c0909341SAndroid Build Coastguard Worker    IDCT32_END            3, 14, 8, 9, 10, 4
6260*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m8, m2, m3   ; 18 19 (interleaved)
6261*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m2, m3       ;  2  3 (interleaved)
6262*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m3, m14, m15 ; 12 13 (interleaved)
6263*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m14, m15      ; 28 29 (interleaved)
6264*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m8
6265*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m14
6266*c0909341SAndroid Build Coastguard Worker    IDCT32_END            4, 15, 8, 9, 10, 4
6267*c0909341SAndroid Build Coastguard Worker    IDCT32_END            5, 14, 8, 9, 10, 4
6268*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m8, m4, m5   ; 20 21 (interleaved)
6269*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m4, m5       ;  4  5 (interleaved)
6270*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m5, m14, m15 ; 10 11 (interleaved)
6271*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m14, m15      ; 26 27 (interleaved)
6272*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m8
6273*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m14
6274*c0909341SAndroid Build Coastguard Worker    IDCT32_END            6, 15, 8, 9, 10, 4
6275*c0909341SAndroid Build Coastguard Worker    IDCT32_END            7, 14, 8, 9, 10, 4
6276*c0909341SAndroid Build Coastguard Worker    punpckhqdq            m8, m6, m7   ; 22 23 (interleaved)
6277*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m6, m7       ;  6  7 (interleaved)
6278*c0909341SAndroid Build Coastguard Worker    punpcklqdq            m7, m14, m15 ;  8  9 (interleaved)
6279*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m14, m15      ; 24 25 (interleaved)
6280*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m8
6281*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m14
6282*c0909341SAndroid Build Coastguard Worker    mova                m15, m1
6283*c0909341SAndroid Build Coastguard Worker.end:
6284*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m0, q3120
6285*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m2, q3120
6286*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_12bpc).write_8x4_start
6287*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6288*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m4, q3120
6289*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m6, q3120
6290*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6291*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m7, q3120
6292*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m5, q3120
6293*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6294*c0909341SAndroid Build Coastguard Worker    vpermq               m0, m3, q3120
6295*c0909341SAndroid Build Coastguard Worker    vpermq               m1, m15, q3120
6296*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6297*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [r5+32*3], q3120
6298*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [r5+32*1], q3120
6299*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6300*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [r5-32*1], q3120
6301*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [r5-32*3], q3120
6302*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6303*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [r5-32*4], q3120
6304*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [r5-32*2], q3120
6305*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6306*c0909341SAndroid Build Coastguard Worker    vpermq               m0, [r5+32*0], q3120
6307*c0909341SAndroid Build Coastguard Worker    vpermq               m1, [r5+32*2], q3120
6308*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).write_8x4
6309*c0909341SAndroid Build Coastguard Worker    RET
6310*c0909341SAndroid Build Coastguard Worker.dconly:
6311*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
6312*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m2, [dconly_12bpc]
6313*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
6314*c0909341SAndroid Build Coastguard Worker    or                  r3d, 32
6315*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
6316*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
6317*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3
6318*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6319*c0909341SAndroid Build Coastguard Worker.pass1_main:
6320*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).pass1_main_part1
6321*c0909341SAndroid Build Coastguard Worker    TRANSPOSE_8X8_DWORD   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15
6322*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*0], m0
6323*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*1], m1
6324*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*2], m2
6325*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*3], m3
6326*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*4], m4
6327*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*5], m5
6328*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*6], m6
6329*c0909341SAndroid Build Coastguard Worker    mova         [cq+128*7], m7
6330*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
6331*c0909341SAndroid Build Coastguard Worker    ret
6332*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6333*c0909341SAndroid Build Coastguard Worker.main_end:
6334*c0909341SAndroid Build Coastguard Worker    psrld               m11, 10 ; pd_2
6335*c0909341SAndroid Build Coastguard Worker    IDCT32_END            0, 15, 8, 9, 10, 2, 0
6336*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*16], m8
6337*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*31], m9
6338*c0909341SAndroid Build Coastguard Worker    IDCT32_END            1, 14, 8, 9, 10, 2, 0
6339*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*17], m8
6340*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*30], m9
6341*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*14], m14
6342*c0909341SAndroid Build Coastguard Worker    IDCT32_END            2, 14, 8, 9, 10, 2, 0
6343*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*18], m8
6344*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*29], m9
6345*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*13], m14
6346*c0909341SAndroid Build Coastguard Worker    IDCT32_END            3, 14, 8, 9, 10, 2, 0
6347*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*19], m8
6348*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*28], m9
6349*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*12], m14
6350*c0909341SAndroid Build Coastguard Worker    IDCT32_END            4, 14, 8, 9, 10, 2, 0
6351*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*20], m8
6352*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*27], m9
6353*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 0], m0
6354*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 1], m1
6355*c0909341SAndroid Build Coastguard Worker    mova         [cq+32* 2], m2
6356*c0909341SAndroid Build Coastguard Worker    IDCT32_END            5, 10, 0, 1, 2, 2, 0
6357*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*21], m0
6358*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*26], m1
6359*c0909341SAndroid Build Coastguard Worker    IDCT32_END            6, 9, 0, 1, 2, 2, 0
6360*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*22], m0
6361*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*25], m1
6362*c0909341SAndroid Build Coastguard Worker    IDCT32_END            7, 8, 0, 1, 2, 2, 0
6363*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*23], m0
6364*c0909341SAndroid Build Coastguard Worker    mova         [cq+32*24], m1
6365*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 0]
6366*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 1]
6367*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32* 2]
6368*c0909341SAndroid Build Coastguard Worker    mova                m11, m14
6369*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32*12]
6370*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*13]
6371*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+32*14]
6372*c0909341SAndroid Build Coastguard Worker    ret
6373*c0909341SAndroid Build Coastguard Worker
6374*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_12bpc, 4, 7, 8, dst, stride, c, eob
6375*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
6376*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_8x32_10bpc).pass1
6377*c0909341SAndroid Build Coastguard Worker
6378*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_10bpc, 4, 7, 0, dst, stride, c, eob
6379*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
6380*c0909341SAndroid Build Coastguard Worker    jnz .full
6381*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
6382*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
6383*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
6384*c0909341SAndroid Build Coastguard Worker    or                  r3d, 8
6385*c0909341SAndroid Build Coastguard Worker.dconly:
6386*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
6387*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
6388*c0909341SAndroid Build Coastguard Worker.dconly2:
6389*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
6390*c0909341SAndroid Build Coastguard Worker    add                 r6d, 2176
6391*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 12
6392*c0909341SAndroid Build Coastguard Worker    movd                xm0, r6d
6393*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm3
6394*c0909341SAndroid Build Coastguard Worker    vpbroadcastw         m0, xm0
6395*c0909341SAndroid Build Coastguard Worker.dconly_loop:
6396*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m0, [dstq+32*0]
6397*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m0, [dstq+32*1]
6398*c0909341SAndroid Build Coastguard Worker    psubusw              m1, m3
6399*c0909341SAndroid Build Coastguard Worker    psubusw              m2, m3
6400*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*0], m1
6401*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*1], m2
6402*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
6403*c0909341SAndroid Build Coastguard Worker    dec                 r3d
6404*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
6405*c0909341SAndroid Build Coastguard Worker    RET
6406*c0909341SAndroid Build Coastguard Worker.full:
6407*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 7, 16, 32*24, dst, stride, c, eob
6408*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
6409*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
6410*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
6411*c0909341SAndroid Build Coastguard Worker    call .pass1
6412*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_end
6413*c0909341SAndroid Build Coastguard Worker    lea                  r6, [deint_shuf+128]
6414*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pw_2048]
6415*c0909341SAndroid Build Coastguard Worker    mov                  r4, dstq
6416*c0909341SAndroid Build Coastguard Worker    call .pass2
6417*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r5+32*3] ; 16 17
6418*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32*2] ; 30 31
6419*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5+32*1] ; 18 19
6420*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r5+32*0] ; 28 29
6421*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r5-32*1] ; 20 21
6422*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*2] ; 26 27
6423*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*3] ; 22 23
6424*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*4] ; 24 25
6425*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
6426*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r4+32]
6427*c0909341SAndroid Build Coastguard Worker    call .pass2
6428*c0909341SAndroid Build Coastguard Worker    RET
6429*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6430*c0909341SAndroid Build Coastguard Worker.pass2:
6431*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_8bpc).main
6432*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m11}, m0, m1, m2, m3
6433*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4_start
6434*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m11, m4
6435*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m11, m5
6436*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m11, m6
6437*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m11, m7
6438*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).write_16x4_zero
6439*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6440*c0909341SAndroid Build Coastguard Worker.pass1:
6441*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 1]
6442*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 7]
6443*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32* 9]
6444*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*15]
6445*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*17]
6446*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*23]
6447*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*25]
6448*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*31]
6449*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
6450*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
6451*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1
6452*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 3]
6453*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 5]
6454*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*11]
6455*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*13]
6456*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*19]
6457*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*21]
6458*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*27]
6459*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*29]
6460*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2
6461*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 2]
6462*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 6]
6463*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32*10]
6464*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*14]
6465*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*18]
6466*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*22]
6467*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*26]
6468*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*30]
6469*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
6470*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 0]
6471*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 4]
6472*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32* 8]
6473*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32*12]
6474*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32*16]
6475*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32*20]
6476*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32*24]
6477*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32*28]
6478*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
6479*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
6480*c0909341SAndroid Build Coastguard Worker    ret
6481*c0909341SAndroid Build Coastguard Worker
6482*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_10bpc, 4, 7, 8, dst, stride, c, eob
6483*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
6484*c0909341SAndroid Build Coastguard Worker.pass1:
6485*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_4096]
6486*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
6487*c0909341SAndroid Build Coastguard Worker    mov                 r6d, eobd
6488*c0909341SAndroid Build Coastguard Worker    add                eobb, 21
6489*c0909341SAndroid Build Coastguard Worker    cmovc              eobd, r6d
6490*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
6491*c0909341SAndroid Build Coastguard Worker    lea                  r5, [strideq*5]
6492*c0909341SAndroid Build Coastguard Worker    lea                  r4, [strideq+r6*2] ; strideq*7
6493*c0909341SAndroid Build Coastguard Worker.loop:
6494*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32*0]
6495*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+32*1]
6496*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32*2]
6497*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+32*3]
6498*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m6}, 0, 1, 2, 3
6499*c0909341SAndroid Build Coastguard Worker    add                  cq, 32*8
6500*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq-32*4]
6501*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq-32*3]
6502*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq-32*2]
6503*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq-32*1]
6504*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m5}, m0, m1, m2, m3
6505*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+32*x], m6}, -4, -3, -2, -1
6506*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_identity_identity_8x32_10bpc).main
6507*c0909341SAndroid Build Coastguard Worker    add                dstq, 16
6508*c0909341SAndroid Build Coastguard Worker    sub                eobd, 64
6509*c0909341SAndroid Build Coastguard Worker    jge .loop
6510*c0909341SAndroid Build Coastguard Worker    RET
6511*c0909341SAndroid Build Coastguard Worker
6512*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_12bpc, 4, 7, 0, dst, stride, c, eob
6513*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
6514*c0909341SAndroid Build Coastguard Worker    jnz .full
6515*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
6516*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_12bpc]
6517*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
6518*c0909341SAndroid Build Coastguard Worker    or                  r3d, 8
6519*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly
6520*c0909341SAndroid Build Coastguard Worker.full:
6521*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 7, 16, 32*24, dst, stride, c, eob
6522*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
6523*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_20b_min]
6524*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_20b_max]
6525*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_32x8_10bpc).pass1
6526*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_12bpc).main_end
6527*c0909341SAndroid Build Coastguard Worker    mov                  r4, dstq
6528*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).pass2_main
6529*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+32* 0] ; 16
6530*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+32* 1] ; 17
6531*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+32* 2] ; 18
6532*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+32* 3] ; 19
6533*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+32* 4] ; 20
6534*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+32* 5] ; 21
6535*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+32* 6] ; 22
6536*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+32* 7] ; 23
6537*c0909341SAndroid Build Coastguard Worker    mova                 m8, [cq+32* 8] ; 24
6538*c0909341SAndroid Build Coastguard Worker    mova                 m9, [cq+32* 9] ; 25
6539*c0909341SAndroid Build Coastguard Worker    mova                m10, [cq+32*10] ; 26
6540*c0909341SAndroid Build Coastguard Worker    mova                m11, [cq+32*11] ; 27
6541*c0909341SAndroid Build Coastguard Worker    mova                m12, [cq+32*12] ; 28
6542*c0909341SAndroid Build Coastguard Worker    mova                m13, [cq+32*13] ; 29
6543*c0909341SAndroid Build Coastguard Worker    mova                m14, [cq+32*14] ; 30
6544*c0909341SAndroid Build Coastguard Worker    mova                m15, [cq+32*15] ; 31
6545*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r4+32]
6546*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_12bpc).pass2_main
6547*c0909341SAndroid Build Coastguard Worker    RET
6548*c0909341SAndroid Build Coastguard Worker
6549*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_12bpc, 4, 7, 8, dst, stride, c, eob
6550*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
6551*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_32x8_10bpc).pass1
6552*c0909341SAndroid Build Coastguard Worker
6553*c0909341SAndroid Build Coastguard Worker%macro IDCT32_PASS2_END 6 ; coefs[1-2], tmp[1-2], offset[1-2]
6554*c0909341SAndroid Build Coastguard Worker    mova                m%4, [%2]
6555*c0909341SAndroid Build Coastguard Worker    paddsw              m%3, m%1, m%4
6556*c0909341SAndroid Build Coastguard Worker    psubsw              m%1, m%4
6557*c0909341SAndroid Build Coastguard Worker%if %1 == 0
6558*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
6559*c0909341SAndroid Build Coastguard Worker%endif
6560*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%3, m15
6561*c0909341SAndroid Build Coastguard Worker    pmulhrsw            m%1, m15
6562*c0909341SAndroid Build Coastguard Worker    paddw               m%3, [dstq+%5]
6563*c0909341SAndroid Build Coastguard Worker    paddw               m%1, [r2+%6]
6564*c0909341SAndroid Build Coastguard Worker    pmaxsw              m%3, m6
6565*c0909341SAndroid Build Coastguard Worker    pmaxsw              m%1, m6
6566*c0909341SAndroid Build Coastguard Worker    pminsw              m%3, m7
6567*c0909341SAndroid Build Coastguard Worker    pminsw              m%1, m7
6568*c0909341SAndroid Build Coastguard Worker    mova          [dstq+%5], m%3
6569*c0909341SAndroid Build Coastguard Worker    mova            [r2+%6], m%1
6570*c0909341SAndroid Build Coastguard Worker%endmacro
6571*c0909341SAndroid Build Coastguard Worker
6572*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_10bpc, 4, 7, 0, dst, stride, c, eob
6573*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
6574*c0909341SAndroid Build Coastguard Worker    jz .dconly
6575*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 8, 16, 32*36, dst, stride, c, eob
6576*c0909341SAndroid Build Coastguard Worker%undef cmp
6577*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
6578*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
6579*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
6580*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
6581*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*16]
6582*c0909341SAndroid Build Coastguard Worker    lea                  r4, [r6+32*8]
6583*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r6+32*16]
6584*c0909341SAndroid Build Coastguard Worker    call .main
6585*c0909341SAndroid Build Coastguard Worker    sub                eobd, 44
6586*c0909341SAndroid Build Coastguard Worker    jge .eob44
6587*c0909341SAndroid Build Coastguard Worker    vperm2i128           m2, m0, m3, 0x31 ;  5
6588*c0909341SAndroid Build Coastguard Worker    vinserti128          m0, xm3, 1       ;  1
6589*c0909341SAndroid Build Coastguard Worker    vperm2i128           m3, m1, m4, 0x31 ;  7
6590*c0909341SAndroid Build Coastguard Worker    vinserti128          m1, xm4, 1       ;  3
6591*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
6592*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
6593*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m4}, 0, 1, 2, 3
6594*c0909341SAndroid Build Coastguard Worker    jmp .fast
6595*c0909341SAndroid Build Coastguard Worker.dconly:
6596*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
6597*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
6598*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
6599*c0909341SAndroid Build Coastguard Worker    or                  r3d, 32
6600*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
6601*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
6602*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
6603*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly2
6604*c0909341SAndroid Build Coastguard Worker.eob44:
6605*c0909341SAndroid Build Coastguard Worker    mova          [r4+16*0], xm0
6606*c0909341SAndroid Build Coastguard Worker    mova          [r4+16*1], xm3
6607*c0909341SAndroid Build Coastguard Worker    mova          [r4+16*2], xm1
6608*c0909341SAndroid Build Coastguard Worker    mova          [r4+16*3], xm4
6609*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4+16*4], m0, 1
6610*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4+16*5], m3, 1
6611*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4+16*6], m1, 1
6612*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4+16*7], m4, 1
6613*c0909341SAndroid Build Coastguard Worker    call .main
6614*c0909341SAndroid Build Coastguard Worker    sub                eobd, 107
6615*c0909341SAndroid Build Coastguard Worker    jge .eob151
6616*c0909341SAndroid Build Coastguard Worker    vperm2i128           m7, m1, m4, 0x31 ; 15
6617*c0909341SAndroid Build Coastguard Worker    vinserti128          m5, m1, xm4, 1   ; 11
6618*c0909341SAndroid Build Coastguard Worker    vperm2i128           m6, m0, m3, 0x31 ; 13
6619*c0909341SAndroid Build Coastguard Worker    vinserti128          m4, m0, xm3, 1   ;  9
6620*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r4+32*0]
6621*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r4+32*1]
6622*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r4+32*2]
6623*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r4+32*3]
6624*c0909341SAndroid Build Coastguard Worker.fast:
6625*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
6626*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
6627*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
6628*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
6629*c0909341SAndroid Build Coastguard Worker    jmp .idct16
6630*c0909341SAndroid Build Coastguard Worker.eob151:
6631*c0909341SAndroid Build Coastguard Worker    mova          [r4-16*8], xm0
6632*c0909341SAndroid Build Coastguard Worker    mova          [r4-16*7], xm3
6633*c0909341SAndroid Build Coastguard Worker    mova          [r4-16*6], xm1
6634*c0909341SAndroid Build Coastguard Worker    mova          [r4-16*5], xm4
6635*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4-16*4], m0, 1
6636*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4-16*3], m3, 1
6637*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4-16*2], m1, 1
6638*c0909341SAndroid Build Coastguard Worker    vextracti128  [r4-16*1], m4, 1
6639*c0909341SAndroid Build Coastguard Worker    call .main
6640*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128
6641*c0909341SAndroid Build Coastguard Worker    jge .eob279
6642*c0909341SAndroid Build Coastguard Worker    vperm2i128          m10, m0, m3, 0x31 ; 21
6643*c0909341SAndroid Build Coastguard Worker    vinserti128          m8, m0, xm3, 1   ; 17
6644*c0909341SAndroid Build Coastguard Worker    vperm2i128          m11, m1, m4, 0x31 ; 23
6645*c0909341SAndroid Build Coastguard Worker    vinserti128          m9, m1, xm4, 1   ; 19
6646*c0909341SAndroid Build Coastguard Worker    pxor                m12, m12
6647*c0909341SAndroid Build Coastguard Worker    REPX      {mova x, m12}, m13, m14, m15
6648*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m12}, 0, 1, 2, 3
6649*c0909341SAndroid Build Coastguard Worker    jmp .full
6650*c0909341SAndroid Build Coastguard Worker.eob279:
6651*c0909341SAndroid Build Coastguard Worker    mova          [r5+16*0], xm0
6652*c0909341SAndroid Build Coastguard Worker    mova          [r5+16*1], xm3
6653*c0909341SAndroid Build Coastguard Worker    mova          [r5+16*2], xm1
6654*c0909341SAndroid Build Coastguard Worker    mova          [r5+16*3], xm4
6655*c0909341SAndroid Build Coastguard Worker    vextracti128  [r5+16*4], m0, 1
6656*c0909341SAndroid Build Coastguard Worker    vextracti128  [r5+16*5], m3, 1
6657*c0909341SAndroid Build Coastguard Worker    vextracti128  [r5+16*6], m1, 1
6658*c0909341SAndroid Build Coastguard Worker    vextracti128  [r5+16*7], m4, 1
6659*c0909341SAndroid Build Coastguard Worker    call .main
6660*c0909341SAndroid Build Coastguard Worker    vperm2i128          m14, m0, m3, 0x31 ; 29
6661*c0909341SAndroid Build Coastguard Worker    vinserti128         m12, m0, xm3, 1   ; 25
6662*c0909341SAndroid Build Coastguard Worker    vperm2i128          m15, m1, m4, 0x31 ; 31
6663*c0909341SAndroid Build Coastguard Worker    vinserti128         m13, m1, xm4, 1   ; 27
6664*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r5+32*0]
6665*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r5+32*1]
6666*c0909341SAndroid Build Coastguard Worker    mova                m10, [r5+32*2]
6667*c0909341SAndroid Build Coastguard Worker    mova                m11, [r5+32*3]
6668*c0909341SAndroid Build Coastguard Worker.full:
6669*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r4+32*0]
6670*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r4+32*1]
6671*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r4+32*2]
6672*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r4+32*3]
6673*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r4-32*4]
6674*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r4-32*3]
6675*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r4-32*2]
6676*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r4-32*1]
6677*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5 + 128]
6678*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf
6679*c0909341SAndroid Build Coastguard Worker    lea                  r3, [rsp+32*8]
6680*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r3+32*0]
6681*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r3+32*1]
6682*c0909341SAndroid Build Coastguard Worker    mova                m10, [r3+32*2]
6683*c0909341SAndroid Build Coastguard Worker    mova                m11, [r3+32*3]
6684*c0909341SAndroid Build Coastguard Worker    mova                m12, [r3-32*4]
6685*c0909341SAndroid Build Coastguard Worker    mova                m13, [r3-32*3]
6686*c0909341SAndroid Build Coastguard Worker    mova                m14, [r3-32*2]
6687*c0909341SAndroid Build Coastguard Worker    mova                m15, [r3-32*1]
6688*c0909341SAndroid Build Coastguard Worker.idct16:
6689*c0909341SAndroid Build Coastguard Worker    lea                  r3, [rsp+32*16]
6690*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r3+32*0]
6691*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r3+32*1]
6692*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r3+32*2]
6693*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r3+32*3]
6694*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r3-32*4]
6695*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r3-32*3]
6696*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r3-32*2]
6697*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r3-32*1]
6698*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
6699*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
6700*c0909341SAndroid Build Coastguard Worker    imul                 r2, strideq, 19
6701*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
6702*c0909341SAndroid Build Coastguard Worker    add                  r2, dstq
6703*c0909341SAndroid Build Coastguard Worker    call .pass2_end
6704*c0909341SAndroid Build Coastguard Worker    RET
6705*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6706*c0909341SAndroid Build Coastguard Worker.main:
6707*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 1]
6708*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 3]
6709*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128* 5]
6710*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128* 7]
6711*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128* 9]
6712*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*11]
6713*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*13]
6714*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*15]
6715*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_rect2
6716*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 0]
6717*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 2]
6718*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128* 4]
6719*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128* 6]
6720*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128* 8]
6721*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*10]
6722*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*12]
6723*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*14]
6724*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main_rect2
6725*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
6726*c0909341SAndroid Build Coastguard Worker    psrld               m15, m11, 11 ; pd_1
6727*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*4]
6728*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*3]
6729*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7
6730*c0909341SAndroid Build Coastguard Worker    psubd               m10, m0, m8 ; out15
6731*c0909341SAndroid Build Coastguard Worker    paddd                m0, m8     ; out0
6732*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*2]
6733*c0909341SAndroid Build Coastguard Worker    paddd               m15, m1, m9 ; out1
6734*c0909341SAndroid Build Coastguard Worker    psubd                m1, m9     ; out14
6735*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6-32*1]
6736*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m0, m15, m10, m1
6737*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m15
6738*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m10
6739*c0909341SAndroid Build Coastguard Worker    psubd               m10, m2, m8 ; out13
6740*c0909341SAndroid Build Coastguard Worker    paddd                m2, m8     ; out2
6741*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6+32*0]
6742*c0909341SAndroid Build Coastguard Worker    paddd               m15, m3, m9 ; out3
6743*c0909341SAndroid Build Coastguard Worker    psubd                m3, m9     ; out12
6744*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*1]
6745*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m2, m15, m10, m3
6746*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m15
6747*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m10
6748*c0909341SAndroid Build Coastguard Worker    psubd               m10, m4, m8 ; out11
6749*c0909341SAndroid Build Coastguard Worker    paddd                m4, m8     ; out4
6750*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6+32*2]
6751*c0909341SAndroid Build Coastguard Worker    paddd               m15, m5, m9 ; out5
6752*c0909341SAndroid Build Coastguard Worker    psubd                m5, m9     ; out10
6753*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*3]
6754*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m4, m10, m15, m5
6755*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m15
6756*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m10
6757*c0909341SAndroid Build Coastguard Worker    psubd               m10, m6, m8 ; out9
6758*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8     ; out6
6759*c0909341SAndroid Build Coastguard Worker    paddd               m15, m7, m9 ; out7
6760*c0909341SAndroid Build Coastguard Worker    psubd                m7, m9     ; out8
6761*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 1}, m6, m10, m15, m7
6762*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m15
6763*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m10
6764*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m2
6765*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m2
6766*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m3, m1
6767*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m1
6768*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m4, m6
6769*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m6
6770*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7, m5
6771*c0909341SAndroid Build Coastguard Worker    punpckhwd            m7, m5
6772*c0909341SAndroid Build Coastguard Worker    pxor                 m5, m5
6773*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*13
6774*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
6775*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m5
6776*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m5
6777*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m5
6778*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m5
6779*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
6780*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
6781*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
6782*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m3, m2
6783*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m2
6784*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m4, m1
6785*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m1
6786*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m0, m8
6787*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m8
6788*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m6, m7
6789*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7
6790*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m7, m1, m4
6791*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m4
6792*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m8, m3
6793*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m8, m3
6794*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m6, m5
6795*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m6, m5
6796*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m5, m0, m2
6797*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m0, m2
6798*c0909341SAndroid Build Coastguard Worker    mova          [r6+16*0], xm5
6799*c0909341SAndroid Build Coastguard Worker    mova          [r6+16*1], xm6
6800*c0909341SAndroid Build Coastguard Worker    mova          [r6+16*2], xm7
6801*c0909341SAndroid Build Coastguard Worker    mova          [r6+16*3], xm8
6802*c0909341SAndroid Build Coastguard Worker    vextracti128  [r6+16*4], m5, 1
6803*c0909341SAndroid Build Coastguard Worker    vextracti128  [r6+16*5], m6, 1
6804*c0909341SAndroid Build Coastguard Worker    vextracti128  [r6+16*6], m7, 1
6805*c0909341SAndroid Build Coastguard Worker    vextracti128  [r6+16*7], m8, 1
6806*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*4
6807*c0909341SAndroid Build Coastguard Worker    ret
6808*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6809*c0909341SAndroid Build Coastguard Worker.pass2_end:
6810*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*0], m6
6811*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*2], m7
6812*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*3], m15
6813*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pw_2048]
6814*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
6815*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      0, r5+32*3, 1, 6, strideq*0, r3*4
6816*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      4, r5-32*1, 0, 1, strideq*4, strideq*8
6817*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      8, r4+32*3, 0, 4, strideq*8, strideq*4
6818*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END     12, r4-32*1, 0, 4, r3*4,      strideq*0
6819*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
6820*c0909341SAndroid Build Coastguard Worker    sub                  r2, strideq
6821*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+gprsize+32*1]
6822*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      1, r5+32*2, 0, 4, strideq*0, r3*4
6823*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      5, r5-32*2, 0, 4, strideq*4, strideq*8
6824*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      9, r4+32*2, 0, 4, strideq*8, strideq*4
6825*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END     13, r4-32*2, 0, 4, r3*4,      strideq*0
6826*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
6827*c0909341SAndroid Build Coastguard Worker    sub                  r2, strideq
6828*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+gprsize+32*0]
6829*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      2, r5+32*1, 0, 4, strideq*0, r3*4
6830*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      1, r5-32*3, 0, 4, strideq*4, strideq*8
6831*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END     10, r4+32*1, 0, 4, strideq*8, strideq*4
6832*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END     14, r4-32*3, 0, 4, r3*4,      strideq*0
6833*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
6834*c0909341SAndroid Build Coastguard Worker    sub                  r2, strideq
6835*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+gprsize+32*2]
6836*c0909341SAndroid Build Coastguard Worker    mova                 m2, [rsp+gprsize+32*3]
6837*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      3, r5+32*0, 0, 4, strideq*0, r3*4
6838*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      1, r5-32*4, 0, 4, strideq*4, strideq*8
6839*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END     11, r4+32*0, 0, 4, strideq*8, strideq*4
6840*c0909341SAndroid Build Coastguard Worker    IDCT32_PASS2_END      2, r4-32*4, 0, 4, r3*4,      strideq*0
6841*c0909341SAndroid Build Coastguard Worker    ret
6842*c0909341SAndroid Build Coastguard Worker
6843*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_10bpc, 4, 7, 12, dst, stride, c, eob
6844*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
6845*c0909341SAndroid Build Coastguard Worker.pass1:
6846*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_2896x8]
6847*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pw_1697x16]
6848*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pw_8192]
6849*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*5]
6850*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
6851*c0909341SAndroid Build Coastguard Worker    paddw               m10, m11, m11 ; pw_16384
6852*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
6853*c0909341SAndroid Build Coastguard Worker    call .main
6854*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
6855*c0909341SAndroid Build Coastguard Worker    jl .ret
6856*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8
6857*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16]
6858*c0909341SAndroid Build Coastguard Worker    call .main
6859*c0909341SAndroid Build Coastguard Worker    sub                  cq, 128*8-32
6860*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+strideq*8]
6861*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
6862*c0909341SAndroid Build Coastguard Worker    call .main
6863*c0909341SAndroid Build Coastguard Worker    sub                eobd, 107 ; eob < 143
6864*c0909341SAndroid Build Coastguard Worker    jl .ret
6865*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8
6866*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16]
6867*c0909341SAndroid Build Coastguard Worker    call .main
6868*c0909341SAndroid Build Coastguard Worker    sub                  cq, 128*8-32
6869*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+strideq*8]
6870*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
6871*c0909341SAndroid Build Coastguard Worker    call .main
6872*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128 ; eob < 271
6873*c0909341SAndroid Build Coastguard Worker    jl .ret
6874*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8
6875*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16]
6876*c0909341SAndroid Build Coastguard Worker    call .main
6877*c0909341SAndroid Build Coastguard Worker    sub                  cq, 128*8-32
6878*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+strideq*8]
6879*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
6880*c0909341SAndroid Build Coastguard Worker    call .main
6881*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128 ; eob < 399
6882*c0909341SAndroid Build Coastguard Worker    jl .ret
6883*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8
6884*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16]
6885*c0909341SAndroid Build Coastguard Worker    call .main
6886*c0909341SAndroid Build Coastguard Worker.ret:
6887*c0909341SAndroid Build Coastguard Worker    RET
6888*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6889*c0909341SAndroid Build Coastguard Worker.main:
6890*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128*0]
6891*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+128*1]
6892*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*2]
6893*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+128*3]
6894*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*4]
6895*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq+128*5]
6896*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*6]
6897*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq+128*7]
6898*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m8 }, m0, m1, m2, m3
6899*c0909341SAndroid Build Coastguard Worker    REPX {IDTX16 x, 4, 9, 10}, 0, 1, 2, 3
6900*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m11}, m0, m1, m2, m3
6901*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+128*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7
6902*c0909341SAndroid Build Coastguard Worker.main2:
6903*c0909341SAndroid Build Coastguard Worker    punpckhwd            m4, m0, m1
6904*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1
6905*c0909341SAndroid Build Coastguard Worker    punpckhwd            m1, m2, m3
6906*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3
6907*c0909341SAndroid Build Coastguard Worker    punpckhwd            m3, m0, m4
6908*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m4
6909*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m2, m1
6910*c0909341SAndroid Build Coastguard Worker    punpckhwd            m2, m1
6911*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m0, m4
6912*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m4
6913*c0909341SAndroid Build Coastguard Worker    call m(iidentity_8x8_internal_10bpc).write_2x8x2
6914*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m3, m2
6915*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m1, m3, m2
6916*c0909341SAndroid Build Coastguard Worker    jmp m(iidentity_8x8_internal_10bpc).write_2x8x2
6917*c0909341SAndroid Build Coastguard Worker
6918*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_12bpc, 4, 7, 12, dst, stride, c, eob
6919*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
6920*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_16x32_10bpc).pass1
6921*c0909341SAndroid Build Coastguard Worker
6922*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_10bpc, 4, 7, 0, dst, stride, c, eob
6923*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
6924*c0909341SAndroid Build Coastguard Worker    jz .dconly
6925*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 8, 16, 32*40, dst, stride, c, eob
6926*c0909341SAndroid Build Coastguard Worker%undef cmp
6927*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
6928*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
6929*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
6930*c0909341SAndroid Build Coastguard Worker    call .main
6931*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
6932*c0909341SAndroid Build Coastguard Worker    jge .full
6933*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
6934*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
6935*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, [rsp]
6936*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
6937*c0909341SAndroid Build Coastguard Worker    mov                  r7, dstq
6938*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
6939*c0909341SAndroid Build Coastguard Worker    call .write_16x16
6940*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r5+32*3]
6941*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32*2]
6942*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5+32*1]
6943*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r5+32*0]
6944*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r5-32*1]
6945*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*2]
6946*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*3]
6947*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*4]
6948*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
6949*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
6950*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, [rsp]
6951*c0909341SAndroid Build Coastguard Worker    jmp .end
6952*c0909341SAndroid Build Coastguard Worker.dconly:
6953*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
6954*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
6955*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
6956*c0909341SAndroid Build Coastguard Worker    or                  r3d, 16
6957*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
6958*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
6959*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
6960*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
6961*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
6962*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly2
6963*c0909341SAndroid Build Coastguard Worker.full:
6964*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
6965*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m0
6966*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m1
6967*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m2
6968*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m3
6969*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m4
6970*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m5
6971*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m6
6972*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m7
6973*c0909341SAndroid Build Coastguard Worker    call .main
6974*c0909341SAndroid Build Coastguard Worker    sub                  r4, 32*16 ; topleft 16x8
6975*c0909341SAndroid Build Coastguard Worker    call .transpose_16x16
6976*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
6977*c0909341SAndroid Build Coastguard Worker    mov                  r7, dstq
6978*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
6979*c0909341SAndroid Build Coastguard Worker    call .write_16x16
6980*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r5+32*3]
6981*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32*2]
6982*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5+32*1]
6983*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r5+32*0]
6984*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r5-32*1]
6985*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*2]
6986*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*3]
6987*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*4]
6988*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8 ; bottomleft 16x8
6989*c0909341SAndroid Build Coastguard Worker    call .transpose_16x16
6990*c0909341SAndroid Build Coastguard Worker.end:
6991*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r7+32]
6992*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
6993*c0909341SAndroid Build Coastguard Worker    call .write_16x16
6994*c0909341SAndroid Build Coastguard Worker    RET
6995*c0909341SAndroid Build Coastguard WorkerALIGN function_align
6996*c0909341SAndroid Build Coastguard Worker.transpose_16x16:
6997*c0909341SAndroid Build Coastguard Worker    punpckhdq            m8, m3, m1
6998*c0909341SAndroid Build Coastguard Worker    punpckldq            m3, m1
6999*c0909341SAndroid Build Coastguard Worker    punpckhdq            m1, m0, m2
7000*c0909341SAndroid Build Coastguard Worker    punpckldq            m0, m2
7001*c0909341SAndroid Build Coastguard Worker    punpckhdq            m2, m7, m5
7002*c0909341SAndroid Build Coastguard Worker    punpckldq            m7, m5
7003*c0909341SAndroid Build Coastguard Worker    punpckhdq            m5, m4, m6
7004*c0909341SAndroid Build Coastguard Worker    punpckldq            m4, m6
7005*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m6, m0, m4
7006*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m0, m4
7007*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m4, m1, m5
7008*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m1, m5
7009*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m5, m7, m3
7010*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m7, m3
7011*c0909341SAndroid Build Coastguard Worker    punpckhqdq           m3, m2, m8
7012*c0909341SAndroid Build Coastguard Worker    punpcklqdq           m2, m8
7013*c0909341SAndroid Build Coastguard Worker    vinserti128          m8, m0, xm7, 1
7014*c0909341SAndroid Build Coastguard Worker    vperm2i128          m12, m0, m7, 0x31
7015*c0909341SAndroid Build Coastguard Worker    vinserti128          m9, m6, xm5, 1
7016*c0909341SAndroid Build Coastguard Worker    vperm2i128          m13, m6, m5, 0x31
7017*c0909341SAndroid Build Coastguard Worker    vinserti128         m10, m1, xm2, 1
7018*c0909341SAndroid Build Coastguard Worker    vperm2i128          m14, m1, m2, 0x31
7019*c0909341SAndroid Build Coastguard Worker    vinserti128         m11, m4, xm3, 1
7020*c0909341SAndroid Build Coastguard Worker    vperm2i128          m15, m4, m3, 0x31
7021*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r4+32*3]
7022*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r4+32*2]
7023*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r4+32*1]
7024*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r4+32*0]
7025*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r4-32*1]
7026*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r4-32*2]
7027*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r4-32*3]
7028*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r4-32*4]
7029*c0909341SAndroid Build Coastguard Worker    mova      [rsp+gprsize], m15
7030*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
7031*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7032*c0909341SAndroid Build Coastguard Worker.main:
7033*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
7034*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
7035*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+64* 1]
7036*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+64* 7]
7037*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+64* 9]
7038*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+64*15]
7039*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+64*17]
7040*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+64*23]
7041*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+64*25]
7042*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+64*31]
7043*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_rect2
7044*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+64* 3]
7045*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+64* 5]
7046*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+64*11]
7047*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+64*13]
7048*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+64*19]
7049*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+64*21]
7050*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+64*27]
7051*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+64*29]
7052*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_rect2
7053*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+64* 2]
7054*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+64* 6]
7055*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+64*10]
7056*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+64*14]
7057*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+64*18]
7058*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+64*22]
7059*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+64*26]
7060*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+64*30]
7061*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_rect2
7062*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+64* 0]
7063*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+64* 4]
7064*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+64* 8]
7065*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+64*12]
7066*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+64*16]
7067*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+64*20]
7068*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+64*24]
7069*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+64*28]
7070*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main_rect2
7071*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
7072*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
7073*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 64*30
7074*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
7075*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7-64*2], m8
7076*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7-64*1], m8
7077*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7+64*0], m8
7078*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7+64*1], m8
7079*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 64*4
7080*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
7081*c0909341SAndroid Build Coastguard Worker.main_end:
7082*c0909341SAndroid Build Coastguard Worker    psrld               m11, 11 ; pd_1
7083*c0909341SAndroid Build Coastguard Worker    IDCT32_END            0, 15, 8, 9, 10, 1
7084*c0909341SAndroid Build Coastguard Worker    IDCT32_END            1, 14, 8, 9, 10, 1
7085*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m0, m1   ; 16 17
7086*c0909341SAndroid Build Coastguard Worker    punpcklwd            m0, m1       ;  0  1
7087*c0909341SAndroid Build Coastguard Worker    punpcklwd            m1, m14, m15 ; 14 15
7088*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 30 31
7089*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m8
7090*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m14
7091*c0909341SAndroid Build Coastguard Worker    IDCT32_END            2, 15, 8, 9, 10, 1
7092*c0909341SAndroid Build Coastguard Worker    IDCT32_END            3, 14, 8, 9, 10, 1
7093*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m2, m3   ; 18 19
7094*c0909341SAndroid Build Coastguard Worker    punpcklwd            m2, m3       ;  2  3
7095*c0909341SAndroid Build Coastguard Worker    punpcklwd            m3, m14, m15 ; 12 13
7096*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 28 29
7097*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m8
7098*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m14
7099*c0909341SAndroid Build Coastguard Worker    IDCT32_END            4, 15, 8, 9, 10, 1
7100*c0909341SAndroid Build Coastguard Worker    IDCT32_END            5, 14, 8, 9, 10, 1
7101*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m4, m5   ; 20 21
7102*c0909341SAndroid Build Coastguard Worker    punpcklwd            m4, m5       ;  4  5
7103*c0909341SAndroid Build Coastguard Worker    punpcklwd            m5, m14, m15 ; 10 11
7104*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 26 27
7105*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m8
7106*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m14
7107*c0909341SAndroid Build Coastguard Worker    IDCT32_END            6, 15, 8, 9, 10, 1
7108*c0909341SAndroid Build Coastguard Worker    IDCT32_END            7, 14, 8, 9, 10, 1
7109*c0909341SAndroid Build Coastguard Worker    punpckhwd            m8, m6, m7   ; 22 23
7110*c0909341SAndroid Build Coastguard Worker    punpcklwd            m6, m7       ;  6  7
7111*c0909341SAndroid Build Coastguard Worker    punpcklwd            m7, m14, m15 ;  8  9
7112*c0909341SAndroid Build Coastguard Worker    punpckhwd           m14, m15      ; 24 25
7113*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m8
7114*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m14
7115*c0909341SAndroid Build Coastguard Worker    ret
7116*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7117*c0909341SAndroid Build Coastguard Worker.write_16x16:
7118*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+gprsize+32*1]
7119*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*0], m8
7120*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*1], m9
7121*c0909341SAndroid Build Coastguard Worker    mova [rsp+gprsize+32*2], m12
7122*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_2048]
7123*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pixel_10bpc_max]
7124*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
7125*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
7126*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12
7127*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12
7128*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12
7129*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12
7130*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4
7131*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, m4
7132*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m5
7133*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m6
7134*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m7
7135*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4
7136*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+gprsize+32*0]
7137*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, [rsp+gprsize+32*1]
7138*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m10
7139*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m11
7140*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).write_16x4
7141*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m0, m12, [rsp+gprsize+32*2]
7142*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m1, m12, m13
7143*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m2, m12, m14
7144*c0909341SAndroid Build Coastguard Worker    pmulhrsw             m3, m12, m15
7145*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x8_internal_10bpc).write_16x4
7146*c0909341SAndroid Build Coastguard Worker
7147*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_10bpc, 4, 7, 11, dst, stride, c, eob
7148*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
7149*c0909341SAndroid Build Coastguard Worker.pass1:
7150*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [pw_2896x8]
7151*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [pw_1697x16]
7152*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pw_4096]
7153*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*5]
7154*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
7155*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
7156*c0909341SAndroid Build Coastguard Worker    call .main
7157*c0909341SAndroid Build Coastguard Worker    sub                eobd, 36
7158*c0909341SAndroid Build Coastguard Worker    jl .ret
7159*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7160*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
7161*c0909341SAndroid Build Coastguard Worker    call .main
7162*c0909341SAndroid Build Coastguard Worker    add                  cq, 64*8-32
7163*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16*1]
7164*c0909341SAndroid Build Coastguard Worker    call .main
7165*c0909341SAndroid Build Coastguard Worker    sub                eobd, 107 ; eob < 143
7166*c0909341SAndroid Build Coastguard Worker    jl .ret
7167*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7168*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
7169*c0909341SAndroid Build Coastguard Worker    call .main
7170*c0909341SAndroid Build Coastguard Worker    add                  cq, 64*8-32
7171*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16*2]
7172*c0909341SAndroid Build Coastguard Worker    call .main
7173*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128 ; eob < 271
7174*c0909341SAndroid Build Coastguard Worker    jl .ret
7175*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7176*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
7177*c0909341SAndroid Build Coastguard Worker    call .main
7178*c0909341SAndroid Build Coastguard Worker    add                  cq, 64*8-32
7179*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r5+16*3]
7180*c0909341SAndroid Build Coastguard Worker    call .main
7181*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128 ; eob < 399
7182*c0909341SAndroid Build Coastguard Worker    jl .ret
7183*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7184*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*4]
7185*c0909341SAndroid Build Coastguard Worker    call .main
7186*c0909341SAndroid Build Coastguard Worker.ret:
7187*c0909341SAndroid Build Coastguard Worker    RET
7188*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7189*c0909341SAndroid Build Coastguard Worker.main:
7190*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64*0]
7191*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+64*1]
7192*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*2]
7193*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+64*3]
7194*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*4]
7195*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq+64*5]
7196*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*6]
7197*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq+64*7]
7198*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m8 }, m0, m1, m2, m3
7199*c0909341SAndroid Build Coastguard Worker    REPX  {paddsw   x, x  }, m0, m1, m2, m3
7200*c0909341SAndroid Build Coastguard Worker    REPX  {IDTX16 x, 4, 9, _ }, 0, 1, 2, 3
7201*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m10}, m0, m1, m2, m3
7202*c0909341SAndroid Build Coastguard Worker    REPX {mova [cq+64*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7
7203*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_16x32_10bpc).main2
7204*c0909341SAndroid Build Coastguard Worker
7205*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_12bpc, 4, 7, 11, dst, stride, c, eob
7206*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
7207*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_32x16_10bpc).pass1
7208*c0909341SAndroid Build Coastguard Worker
7209*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_10bpc, 4, 7, 0, dst, stride, c, eob
7210*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
7211*c0909341SAndroid Build Coastguard Worker    jz .dconly
7212*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 8, 16, 32*83, dst, stride, c, eob
7213*c0909341SAndroid Build Coastguard Worker%undef cmp
7214*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
7215*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
7216*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*7]
7217*c0909341SAndroid Build Coastguard Worker    call .main
7218*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
7219*c0909341SAndroid Build Coastguard Worker    jl .fast
7220*c0909341SAndroid Build Coastguard Worker    call .main
7221*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 136
7222*c0909341SAndroid Build Coastguard Worker    jl .fast
7223*c0909341SAndroid Build Coastguard Worker    call .main
7224*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 300
7225*c0909341SAndroid Build Coastguard Worker    jl .fast
7226*c0909341SAndroid Build Coastguard Worker    call .main
7227*c0909341SAndroid Build Coastguard Worker    jmp .pass2
7228*c0909341SAndroid Build Coastguard Worker.dconly:
7229*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
7230*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
7231*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
7232*c0909341SAndroid Build Coastguard Worker    or                  r3d, 32
7233*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly
7234*c0909341SAndroid Build Coastguard Worker.fast:
7235*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*71]
7236*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
7237*c0909341SAndroid Build Coastguard Worker.fast_loop:
7238*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
7239*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
7240*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r4
7241*c0909341SAndroid Build Coastguard Worker    jl .fast_loop
7242*c0909341SAndroid Build Coastguard Worker.pass2:
7243*c0909341SAndroid Build Coastguard Worker    lea                  r3, [rsp+32*3]
7244*c0909341SAndroid Build Coastguard Worker    mov                  r4, r6
7245*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r6+32*8]
7246*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
7247*c0909341SAndroid Build Coastguard Worker    call .pass2_oddhalf
7248*c0909341SAndroid Build Coastguard Worker    call .pass2_evenhalf
7249*c0909341SAndroid Build Coastguard Worker    imul                 r2, strideq, 19
7250*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
7251*c0909341SAndroid Build Coastguard Worker    add                  r2, dstq
7252*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end
7253*c0909341SAndroid Build Coastguard Worker    sub                dstq, r3
7254*c0909341SAndroid Build Coastguard Worker    lea                  r2, [r2+r3+32]
7255*c0909341SAndroid Build Coastguard Worker    add                dstq, 32
7256*c0909341SAndroid Build Coastguard Worker    lea                  r3, [rsp+32*11]
7257*c0909341SAndroid Build Coastguard Worker    call .pass2_oddhalf
7258*c0909341SAndroid Build Coastguard Worker    call .pass2_evenhalf
7259*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
7260*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end
7261*c0909341SAndroid Build Coastguard Worker    RET
7262*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7263*c0909341SAndroid Build Coastguard Worker.main:
7264*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 1]
7265*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 7]
7266*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128* 9]
7267*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*15]
7268*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128*17]
7269*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*23]
7270*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*25]
7271*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*31]
7272*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
7273*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
7274*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1
7275*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 3]
7276*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 5]
7277*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*11]
7278*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*13]
7279*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128*19]
7280*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*21]
7281*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*27]
7282*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*29]
7283*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2
7284*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 2]
7285*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 6]
7286*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*10]
7287*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*14]
7288*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128*18]
7289*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*22]
7290*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*26]
7291*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*30]
7292*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
7293*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 0]
7294*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 4]
7295*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128* 8]
7296*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*12]
7297*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128*16]
7298*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*20]
7299*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*24]
7300*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*28]
7301*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
7302*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
7303*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_end
7304*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
7305*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*29
7306*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
7307*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m15
7308*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m15
7309*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m15
7310*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m15
7311*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
7312*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
7313*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7314*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m0
7315*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m1
7316*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m2
7317*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m3
7318*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m4
7319*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m5
7320*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m6
7321*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m7
7322*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r5+32*3]
7323*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32*2]
7324*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5+32*1]
7325*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r5+32*0]
7326*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r5-32*1]
7327*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*2]
7328*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*3]
7329*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*4]
7330*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
7331*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m0
7332*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m1
7333*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m2
7334*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m3
7335*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m4
7336*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m5
7337*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m6
7338*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m7
7339*c0909341SAndroid Build Coastguard Worker    ret
7340*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7341*c0909341SAndroid Build Coastguard Worker.pass2_oddhalf:
7342*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r3+32* 1] ;  1
7343*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r3+32* 3] ;  3
7344*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r3+32* 5] ;  5
7345*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r3+32* 7] ;  7
7346*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r3+32*17] ;  9
7347*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r3+32*19] ; 11
7348*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r3+32*21] ; 13
7349*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r3+32*23] ; 15
7350*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r3+32*33] ; 17
7351*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r3+32*35] ; 19
7352*c0909341SAndroid Build Coastguard Worker    mova                m10, [r3+32*37] ; 21
7353*c0909341SAndroid Build Coastguard Worker    mova                m11, [r3+32*39] ; 23
7354*c0909341SAndroid Build Coastguard Worker    mova                m12, [r3+32*49] ; 25
7355*c0909341SAndroid Build Coastguard Worker    mova                m13, [r3+32*51] ; 27
7356*c0909341SAndroid Build Coastguard Worker    mova                m14, [r3+32*53] ; 29
7357*c0909341SAndroid Build Coastguard Worker    mova                m15, [r3+32*55] ; 31
7358*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf
7359*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7360*c0909341SAndroid Build Coastguard Worker.pass2_evenhalf:
7361*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r3+32* 0] ;  0
7362*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r3+32* 2] ;  2
7363*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r3+32* 4] ;  4
7364*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r3+32* 6] ;  6
7365*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r3+32*16] ;  8
7366*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r3+32*18] ; 10
7367*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r3+32*20] ; 12
7368*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r3+32*22] ; 14
7369*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r3+32*32] ; 16
7370*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r3+32*34] ; 18
7371*c0909341SAndroid Build Coastguard Worker    mova                m10, [r3+32*36] ; 20
7372*c0909341SAndroid Build Coastguard Worker    mova                m11, [r3+32*38] ; 22
7373*c0909341SAndroid Build Coastguard Worker    mova                m12, [r3+32*48] ; 24
7374*c0909341SAndroid Build Coastguard Worker    mova                m13, [r3+32*50] ; 26
7375*c0909341SAndroid Build Coastguard Worker    mova                m14, [r3+32*52] ; 28
7376*c0909341SAndroid Build Coastguard Worker    mova                m15, [r3+32*54] ; 30
7377*c0909341SAndroid Build Coastguard Worker    mova      [rsp+gprsize], m15
7378*c0909341SAndroid Build Coastguard Worker    jmp m(idct_16x16_internal_8bpc).main
7379*c0909341SAndroid Build Coastguard Worker
7380*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_10bpc, 4, 8, 8, dst, stride, c, eob
7381*c0909341SAndroid Build Coastguard Worker%undef cmp
7382*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_10bpc_max]
7383*c0909341SAndroid Build Coastguard Worker.pass1:
7384*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [pw_8192]
7385*c0909341SAndroid Build Coastguard Worker    pxor                 m6, m6
7386*c0909341SAndroid Build Coastguard Worker    lea                  r6, [strideq*3]
7387*c0909341SAndroid Build Coastguard Worker    lea                  r5, [strideq*5]
7388*c0909341SAndroid Build Coastguard Worker    lea                  r4, [strideq+r6*2] ; strideq*7
7389*c0909341SAndroid Build Coastguard Worker    call .main                              ; 0
7390*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
7391*c0909341SAndroid Build Coastguard Worker    jl .ret
7392*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8          ; 0 1
7393*c0909341SAndroid Build Coastguard Worker    mov                  r7, dstq           ; 1
7394*c0909341SAndroid Build Coastguard Worker    add                dstq, 16
7395*c0909341SAndroid Build Coastguard Worker    call .main
7396*c0909341SAndroid Build Coastguard Worker    call .main2
7397*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 136
7398*c0909341SAndroid Build Coastguard Worker    jl .ret
7399*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*16-32      ; 0 1 2
7400*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r7+16*2]      ; 1 2
7401*c0909341SAndroid Build Coastguard Worker    call .main                              ; 2
7402*c0909341SAndroid Build Coastguard Worker    call .main2
7403*c0909341SAndroid Build Coastguard Worker    call .main2
7404*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 300
7405*c0909341SAndroid Build Coastguard Worker    jl .ret
7406*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*24-64      ; 0 1 2 3
7407*c0909341SAndroid Build Coastguard Worker    add                  r7, 16*3           ; 1 2 3
7408*c0909341SAndroid Build Coastguard Worker    mov                dstq, r7             ; 2 3
7409*c0909341SAndroid Build Coastguard Worker    call .main                              ; 3
7410*c0909341SAndroid Build Coastguard Worker    call .main2
7411*c0909341SAndroid Build Coastguard Worker    call .main2
7412*c0909341SAndroid Build Coastguard Worker    call .main2
7413*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 535
7414*c0909341SAndroid Build Coastguard Worker    jl .ret
7415*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*24-64      ; 0 1 2 3
7416*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r7+strideq*8] ; 1 2 3 4
7417*c0909341SAndroid Build Coastguard Worker    mov                  r7, dstq           ; 2 3 4
7418*c0909341SAndroid Build Coastguard Worker    call .main                              ; 3 4
7419*c0909341SAndroid Build Coastguard Worker    call .main2
7420*c0909341SAndroid Build Coastguard Worker    call .main2
7421*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 755
7422*c0909341SAndroid Build Coastguard Worker    jl .ret
7423*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*16-32      ; 0 1 2 3
7424*c0909341SAndroid Build Coastguard Worker    lea                dstq, [r7+strideq*8] ; 1 2 3 4
7425*c0909341SAndroid Build Coastguard Worker    call .main                              ; 2 3 4 5
7426*c0909341SAndroid Build Coastguard Worker    call .main2                             ; 3 4 5
7427*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 911
7428*c0909341SAndroid Build Coastguard Worker    jl .ret
7429*c0909341SAndroid Build Coastguard Worker    add                  cq, 128*8          ; 0 1 2 3
7430*c0909341SAndroid Build Coastguard Worker    add                dstq, 16             ; 1 2 3 4
7431*c0909341SAndroid Build Coastguard Worker    call .main                              ; 2 3 4 5
7432*c0909341SAndroid Build Coastguard Worker.ret:                                       ; 3 4 5 6
7433*c0909341SAndroid Build Coastguard Worker    RET
7434*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7435*c0909341SAndroid Build Coastguard Worker.main2:
7436*c0909341SAndroid Build Coastguard Worker    sub                  cq, 128*8-32
7437*c0909341SAndroid Build Coastguard Worker    lea                dstq, [dstq+strideq*8-16]
7438*c0909341SAndroid Build Coastguard Worker.main:
7439*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128*0]
7440*c0909341SAndroid Build Coastguard Worker    packssdw             m0, [cq+128*1]
7441*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*2]
7442*c0909341SAndroid Build Coastguard Worker    packssdw             m1, [cq+128*3]
7443*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*4]
7444*c0909341SAndroid Build Coastguard Worker    packssdw             m2, [cq+128*5]
7445*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*6]
7446*c0909341SAndroid Build Coastguard Worker    packssdw             m3, [cq+128*7]
7447*c0909341SAndroid Build Coastguard Worker    REPX   {pmulhrsw x, m5}, m0, m1, m2, m3
7448*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_8x32_10bpc).main_zero
7449*c0909341SAndroid Build Coastguard Worker
7450*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_12bpc, 4, 8, 8, dst, stride, c, eob
7451*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [pixel_12bpc_max]
7452*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_identity_identity_32x32_10bpc).pass1
7453*c0909341SAndroid Build Coastguard Worker
7454*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PART2_END 6-10 ; out, src[1-2], tmp[1-3], (offset[1-4])
7455*c0909341SAndroid Build Coastguard Worker%if %1 & 1
7456*c0909341SAndroid Build Coastguard Worker    mova                m%5, [r5-32*(51-%1)] ; idct16 out 0+n
7457*c0909341SAndroid Build Coastguard Worker    mova                m%4, [r4-32*(14+%1)] ; idct32 out31-n
7458*c0909341SAndroid Build Coastguard Worker%else
7459*c0909341SAndroid Build Coastguard Worker    mova                m%5, [r4-32*(45-%1)]
7460*c0909341SAndroid Build Coastguard Worker    mova                m%4, [r5-32*(20+%1)]
7461*c0909341SAndroid Build Coastguard Worker%endif
7462*c0909341SAndroid Build Coastguard Worker    paddsw              m%6, m%5, m%4 ; idct32 out 0+n
7463*c0909341SAndroid Build Coastguard Worker    psubsw              m%5, m%4      ; idct32 out31-n
7464*c0909341SAndroid Build Coastguard Worker    paddsw              m%4, m%5, m%3 ; out31-n
7465*c0909341SAndroid Build Coastguard Worker    psubsw              m%5, m%3      ; out32+n
7466*c0909341SAndroid Build Coastguard Worker    paddsw              m%3, m%6, m%2 ; out 0+n
7467*c0909341SAndroid Build Coastguard Worker    psubsw              m%6, m%2      ; out63-n
7468*c0909341SAndroid Build Coastguard Worker    REPX  {pmulhrsw x, m14}, m%5, m%6, m%4, m%3
7469*c0909341SAndroid Build Coastguard Worker%if %1 & 1
7470*c0909341SAndroid Build Coastguard Worker    %define %%d0 r2
7471*c0909341SAndroid Build Coastguard Worker    %define %%d1 dstq
7472*c0909341SAndroid Build Coastguard Worker%else
7473*c0909341SAndroid Build Coastguard Worker    %define %%d0 dstq
7474*c0909341SAndroid Build Coastguard Worker    %define %%d1 r2
7475*c0909341SAndroid Build Coastguard Worker%endif
7476*c0909341SAndroid Build Coastguard Worker    paddw               m%3, [%%d0+%7 ]
7477*c0909341SAndroid Build Coastguard Worker    paddw               m%4, [%%d1+%8 ]
7478*c0909341SAndroid Build Coastguard Worker    paddw               m%5, [%%d0+%9 ]
7479*c0909341SAndroid Build Coastguard Worker    paddw               m%6, [%%d1+%10]
7480*c0909341SAndroid Build Coastguard Worker    pxor                m%2, m%2
7481*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsw x, m%2}, m%3, m%4, m%5, m%6
7482*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m%2, [pixel_10bpc_max]
7483*c0909341SAndroid Build Coastguard Worker    REPX    {pminsw x, m%2}, m%3, m%4, m%5, m%6
7484*c0909341SAndroid Build Coastguard Worker    mova         [%%d0+%7 ], m%3
7485*c0909341SAndroid Build Coastguard Worker    mova         [%%d1+%8 ], m%4
7486*c0909341SAndroid Build Coastguard Worker    mova         [%%d0+%9 ], m%5
7487*c0909341SAndroid Build Coastguard Worker    mova         [%%d1+%10], m%6
7488*c0909341SAndroid Build Coastguard Worker%endmacro
7489*c0909341SAndroid Build Coastguard Worker
7490*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_10bpc, 4, 7, 0, dst, stride, c, eob
7491*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
7492*c0909341SAndroid Build Coastguard Worker    jz .dconly
7493*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 10, 16, 32*98, dst, stride, c, eob
7494*c0909341SAndroid Build Coastguard Worker%undef cmp
7495*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
7496*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
7497*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
7498*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
7499*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*6]
7500*c0909341SAndroid Build Coastguard Worker    call .main
7501*c0909341SAndroid Build Coastguard Worker    sub                eobd, 44
7502*c0909341SAndroid Build Coastguard Worker    jl .fast
7503*c0909341SAndroid Build Coastguard Worker    call .main
7504*c0909341SAndroid Build Coastguard Worker    sub                eobd, 107
7505*c0909341SAndroid Build Coastguard Worker    jl .fast
7506*c0909341SAndroid Build Coastguard Worker    call .main
7507*c0909341SAndroid Build Coastguard Worker    sub                eobd, 128
7508*c0909341SAndroid Build Coastguard Worker    jl .fast
7509*c0909341SAndroid Build Coastguard Worker    call .main
7510*c0909341SAndroid Build Coastguard Worker    jmp .pass2
7511*c0909341SAndroid Build Coastguard Worker.dconly:
7512*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
7513*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
7514*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
7515*c0909341SAndroid Build Coastguard Worker    or                  r3d, 64
7516*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
7517*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
7518*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly3
7519*c0909341SAndroid Build Coastguard Worker.fast:
7520*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*38]
7521*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
7522*c0909341SAndroid Build Coastguard Worker.fast_loop:
7523*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
7524*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
7525*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r4
7526*c0909341SAndroid Build Coastguard Worker    jl .fast_loop
7527*c0909341SAndroid Build Coastguard Worker.pass2:
7528*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
7529*c0909341SAndroid Build Coastguard Worker    mova                 m0, [rsp+32* 2] ; in0
7530*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32* 6] ; in4
7531*c0909341SAndroid Build Coastguard Worker    mova                 m2, [rsp+32*10] ; in8
7532*c0909341SAndroid Build Coastguard Worker    mova                 m3, [rsp+32*14] ; in12
7533*c0909341SAndroid Build Coastguard Worker    mova                 m4, [rsp+32*18] ; in16
7534*c0909341SAndroid Build Coastguard Worker    mova                 m5, [rsp+32*22] ; in20
7535*c0909341SAndroid Build Coastguard Worker    mova                 m6, [rsp+32*26] ; in24
7536*c0909341SAndroid Build Coastguard Worker    mova                 m7, [rsp+32*30] ; in28
7537*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
7538*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14
7539*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m8
7540*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
7541*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*1]
7542*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*38]
7543*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m0
7544*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m1
7545*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m2
7546*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m3
7547*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m4
7548*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m5
7549*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m6
7550*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m7
7551*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7552*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m8
7553*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m9
7554*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m10
7555*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m11
7556*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m12
7557*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m13
7558*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m14
7559*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m15
7560*c0909341SAndroid Build Coastguard Worker    mova                 m0, [rsp+32* 4] ; in2
7561*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32* 8] ; in6
7562*c0909341SAndroid Build Coastguard Worker    mova                 m2, [rsp+32*12] ; in10
7563*c0909341SAndroid Build Coastguard Worker    mova                 m3, [rsp+32*16] ; in14
7564*c0909341SAndroid Build Coastguard Worker    mova                 m4, [rsp+32*20] ; in18
7565*c0909341SAndroid Build Coastguard Worker    mova                 m5, [rsp+32*24] ; in22
7566*c0909341SAndroid Build Coastguard Worker    mova                 m6, [rsp+32*28] ; in26
7567*c0909341SAndroid Build Coastguard Worker    mova                 m7, [rsp+32*32] ; in30
7568*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r4+32*16]
7569*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7570*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
7571*c0909341SAndroid Build Coastguard Worker    mova                 m0, [rsp+32* 3] ; in1
7572*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*33] ; in31
7573*c0909341SAndroid Build Coastguard Worker    mova                 m2, [rsp+32*19] ; in17
7574*c0909341SAndroid Build Coastguard Worker    mova                 m3, [rsp+32*17] ; in15
7575*c0909341SAndroid Build Coastguard Worker    mova                 m4, [rsp+32*11] ; in9
7576*c0909341SAndroid Build Coastguard Worker    mova                 m5, [rsp+32*25] ; in23
7577*c0909341SAndroid Build Coastguard Worker    mova                 m6, [rsp+32*27] ; in25
7578*c0909341SAndroid Build Coastguard Worker    mova                 m7, [rsp+32* 9] ; in7
7579*c0909341SAndroid Build Coastguard Worker    lea                  r6, [idct64_mul - 8]
7580*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*16
7581*c0909341SAndroid Build Coastguard Worker    add                  r5, 32*32
7582*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
7583*c0909341SAndroid Build Coastguard Worker    mova                 m0, [rsp+32* 7] ; in5
7584*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*29] ; in27
7585*c0909341SAndroid Build Coastguard Worker    mova                 m2, [rsp+32*23] ; in21
7586*c0909341SAndroid Build Coastguard Worker    mova                 m3, [rsp+32*13] ; in11
7587*c0909341SAndroid Build Coastguard Worker    mova                 m4, [rsp+32*15] ; in13
7588*c0909341SAndroid Build Coastguard Worker    mova                 m5, [rsp+32*21] ; in19
7589*c0909341SAndroid Build Coastguard Worker    mova                 m6, [rsp+32*31] ; in29
7590*c0909341SAndroid Build Coastguard Worker    mova                 m7, [rsp+32* 5] ; in3
7591*c0909341SAndroid Build Coastguard Worker    add                  r6, 8
7592*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7593*c0909341SAndroid Build Coastguard Worker    sub                  r5, 32*8
7594*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
7595*c0909341SAndroid Build Coastguard Worker    lea                  r8, [strideq*4]
7596*c0909341SAndroid Build Coastguard Worker    lea                  r9, [strideq*5]
7597*c0909341SAndroid Build Coastguard Worker    lea                  r3, [r9+strideq*1] ; stride*6
7598*c0909341SAndroid Build Coastguard Worker    lea                  r7, [r9+strideq*2] ; stride*7
7599*c0909341SAndroid Build Coastguard Worker    call .main_part2_pass2
7600*c0909341SAndroid Build Coastguard Worker    RET
7601*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7602*c0909341SAndroid Build Coastguard Worker.main:
7603*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 1]
7604*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 3]
7605*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128* 5]
7606*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128* 7]
7607*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128* 9]
7608*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*11]
7609*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*13]
7610*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*15]
7611*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf
7612*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 0]
7613*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 2]
7614*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128* 4]
7615*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128* 6]
7616*c0909341SAndroid Build Coastguard Worker    mova                 m4, [cq+128* 8]
7617*c0909341SAndroid Build Coastguard Worker    mova                 m5, [cq+128*10]
7618*c0909341SAndroid Build Coastguard Worker    mova                 m6, [cq+128*12]
7619*c0909341SAndroid Build Coastguard Worker    mova                 m7, [cq+128*14]
7620*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
7621*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
7622*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
7623*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*13
7624*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
7625*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m15
7626*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m15
7627*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m15
7628*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m15
7629*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
7630*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
7631*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
7632*c0909341SAndroid Build Coastguard Worker    psrld               m15, m11, 10 ; pd_2
7633*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*4]
7634*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*3]
7635*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7
7636*c0909341SAndroid Build Coastguard Worker    psubd               m10, m0, m8 ; out15
7637*c0909341SAndroid Build Coastguard Worker    paddd                m0, m8     ; out0
7638*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*3]
7639*c0909341SAndroid Build Coastguard Worker    psubd               m15, m7, m9 ; out8
7640*c0909341SAndroid Build Coastguard Worker    paddd                m7, m9     ; out7
7641*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*2]
7642*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m0, m15, m10, m7
7643*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m15
7644*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m10
7645*c0909341SAndroid Build Coastguard Worker    psubd               m10, m1, m8 ; out14
7646*c0909341SAndroid Build Coastguard Worker    paddd                m1, m8     ; out1
7647*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*2]
7648*c0909341SAndroid Build Coastguard Worker    psubd               m15, m6, m9 ; out9
7649*c0909341SAndroid Build Coastguard Worker    paddd                m6, m9     ; out6
7650*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*1]
7651*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m1, m15, m10, m6
7652*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m15
7653*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m10
7654*c0909341SAndroid Build Coastguard Worker    psubd               m10, m2, m8 ; out13
7655*c0909341SAndroid Build Coastguard Worker    paddd                m2, m8     ; out2
7656*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6-32*1]
7657*c0909341SAndroid Build Coastguard Worker    psubd               m15, m5, m9 ; out10
7658*c0909341SAndroid Build Coastguard Worker    paddd                m5, m9     ; out5
7659*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*0]
7660*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m2, m15, m10, m5
7661*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m15
7662*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m10
7663*c0909341SAndroid Build Coastguard Worker    psubd               m10, m3, m8 ; out12
7664*c0909341SAndroid Build Coastguard Worker    paddd                m3, m8     ; out3
7665*c0909341SAndroid Build Coastguard Worker    psubd               m15, m4, m9 ; out11
7666*c0909341SAndroid Build Coastguard Worker    paddd                m4, m9     ; out4
7667*c0909341SAndroid Build Coastguard Worker    REPX       {psrad x, 2}, m3, m15, m10, m4
7668*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m15
7669*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m10
7670*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose3
7671*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
7672*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
7673*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m2
7674*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
7675*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m4
7676*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m5
7677*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m6
7678*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
7679*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
7680*c0909341SAndroid Build Coastguard Worker    ret
7681*c0909341SAndroid Build Coastguard Worker.main_part2_pass2:
7682*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pw_1567_3784]
7683*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [pw_m3784_1567]
7684*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [pw_2896_2896]
7685*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
7686*c0909341SAndroid Build Coastguard Worker    lea                  r2, [dstq+r7]
7687*c0909341SAndroid Build Coastguard Worker.main_part2_pass2_loop:
7688*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pw_m2896_2896]
7689*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_internal
7690*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pw_2048]
7691*c0909341SAndroid Build Coastguard Worker    IDCT64_PART2_END      0,  7,  0,  6,  9, 10, strideq*0, r3*4, r8*8, r7*8
7692*c0909341SAndroid Build Coastguard Worker    IDCT64_PART2_END      7,  8,  5,  0,  6,  7, strideq*0, r3*4, r8*8, r7*8
7693*c0909341SAndroid Build Coastguard Worker    IDCT64_PART2_END      8,  2,  1,  0,  6,  7, strideq*8, r8*4, r9*8, r3*8
7694*c0909341SAndroid Build Coastguard Worker    IDCT64_PART2_END     15,  3,  4,  0,  6,  7, strideq*8, r8*4, r9*8, r3*8
7695*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
7696*c0909341SAndroid Build Coastguard Worker    sub                  r2, strideq
7697*c0909341SAndroid Build Coastguard Worker    cmp                  r4, r5
7698*c0909341SAndroid Build Coastguard Worker    jne .main_part2_pass2_loop
7699*c0909341SAndroid Build Coastguard Worker    ret
7700*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7701*c0909341SAndroid Build Coastguard Worker.main_part1_rect2:
7702*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
7703*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
7704*c0909341SAndroid Build Coastguard Worker.main_part1: ; idct64 steps 1-5
7705*c0909341SAndroid Build Coastguard Worker    ; in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a
7706*c0909341SAndroid Build Coastguard Worker    ; in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a
7707*c0909341SAndroid Build Coastguard Worker    ; in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a
7708*c0909341SAndroid Build Coastguard Worker    ; in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a
7709*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m7, [r5+4*0]
7710*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m8, [r5+4*1]
7711*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m6, [r5+4*2]
7712*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m9, [r5+4*3]
7713*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [r5+4*4]
7714*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [r5+4*5]
7715*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m4, [r5+4*6]
7716*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [r5+4*7]
7717*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m0     ; t63a
7718*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m8     ; t32a
7719*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m1     ; t62a
7720*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m9     ; t33a
7721*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m2     ; t61a
7722*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m10    ; t34a
7723*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m3     ; t60a
7724*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m15    ; t35a
7725*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [r5+4*8]
7726*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [r5+4*9]
7727*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m7, m0, m6, m1, m5, m2, m4, m3
7728*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m7, m6, m2, m3, m5, m4
7729*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m1 ; t33
7730*c0909341SAndroid Build Coastguard Worker    paddd                m0, m1     ; t32
7731*c0909341SAndroid Build Coastguard Worker    psubd                m1, m7, m6 ; t62
7732*c0909341SAndroid Build Coastguard Worker    paddd                m7, m6     ; t63
7733*c0909341SAndroid Build Coastguard Worker    psubd                m6, m3, m2 ; t34
7734*c0909341SAndroid Build Coastguard Worker    paddd                m3, m2     ; t35
7735*c0909341SAndroid Build Coastguard Worker    psubd                m2, m4, m5 ; t61
7736*c0909341SAndroid Build Coastguard Worker    paddd                m4, m5     ; t60
7737*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m1, m6, m2
7738*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m1, m6, m2
7739*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 8, 5, 9, _, 11, 10, 15    ; t33a, t62a
7740*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         2, 6, 5, 9, _, 11, 10, 15, 2 ; t61a, t34a
7741*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m3, m7, m4
7742*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m3, m7, m4
7743*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [r5+4*10]
7744*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [r5+4*11]
7745*c0909341SAndroid Build Coastguard Worker    psubd                m5, m0, m3 ; t35a
7746*c0909341SAndroid Build Coastguard Worker    paddd                m0, m3     ; t32a
7747*c0909341SAndroid Build Coastguard Worker    psubd                m3, m7, m4 ; t60a
7748*c0909341SAndroid Build Coastguard Worker    paddd                m7, m4     ; t63a
7749*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1, m6 ; t34
7750*c0909341SAndroid Build Coastguard Worker    paddd                m1, m6     ; t33
7751*c0909341SAndroid Build Coastguard Worker    psubd                m6, m8, m2 ; t61
7752*c0909341SAndroid Build Coastguard Worker    paddd                m8, m2     ; t62
7753*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m5, m3, m4, m6
7754*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m5, m3, m4, m6
7755*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         3, 5, 2, 9, _, 11, 10, 15 ; t35,  t60
7756*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         6, 4, 2, 9, _, 11, 10, 15 ; t34a, t61a
7757*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m7, m1, m8
7758*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m7, m1, m8
7759*c0909341SAndroid Build Coastguard Worker    add                  r5, 4*12
7760*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m0
7761*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*3], m7
7762*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m1
7763*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m8
7764*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m6
7765*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m4
7766*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m3
7767*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m5
7768*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
7769*c0909341SAndroid Build Coastguard Worker    ret
7770*c0909341SAndroid Build Coastguard Worker.main_part2: ; idct64 steps 6-9
7771*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r6+32*3]
7772*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*4
7773*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m10, [pd_1567]
7774*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m15, [pd_3784]
7775*c0909341SAndroid Build Coastguard Worker.main_part2_loop:
7776*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6-32*32] ; t32a
7777*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5-32*24] ; t39a
7778*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5-32*32] ; t63a
7779*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32*24] ; t56a
7780*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6-32*16] ; t40a
7781*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32* 8] ; t47a
7782*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*16] ; t55a
7783*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6-32* 8] ; t48a
7784*c0909341SAndroid Build Coastguard Worker    psubd                m8, m0, m1 ; t39
7785*c0909341SAndroid Build Coastguard Worker    paddd                m0, m1     ; t32
7786*c0909341SAndroid Build Coastguard Worker    psubd                m1, m2, m3 ; t56
7787*c0909341SAndroid Build Coastguard Worker    paddd                m2, m3     ; t63
7788*c0909341SAndroid Build Coastguard Worker    psubd                m3, m5, m4 ; t40
7789*c0909341SAndroid Build Coastguard Worker    paddd                m5, m4     ; t47
7790*c0909341SAndroid Build Coastguard Worker    psubd                m4, m7, m6 ; t55
7791*c0909341SAndroid Build Coastguard Worker    paddd                m7, m6     ; t48
7792*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m1, m3, m4
7793*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m1, m3, m4
7794*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         1, 8, 6, 9, _, 11, 10, 15    ; t39a, t56a
7795*c0909341SAndroid Build Coastguard Worker    ITX_MULSUB_2D         4, 3, 6, 9, _, 11, 10, 15, 2 ; t55a, t40a
7796*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m0, m2, m5, m7
7797*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m0, m5, m2, m7
7798*c0909341SAndroid Build Coastguard Worker    psubd                m6, m2, m7 ; t48a
7799*c0909341SAndroid Build Coastguard Worker    paddd                m2, m7     ; t63a
7800*c0909341SAndroid Build Coastguard Worker    psubd                m7, m0, m5 ; t47a
7801*c0909341SAndroid Build Coastguard Worker    paddd                m0, m5     ; t32a
7802*c0909341SAndroid Build Coastguard Worker    psubd                m5, m8, m4 ; t55
7803*c0909341SAndroid Build Coastguard Worker    paddd                m8, m4     ; t56
7804*c0909341SAndroid Build Coastguard Worker    psubd                m4, m1, m3 ; t40
7805*c0909341SAndroid Build Coastguard Worker    paddd                m1, m3     ; t39
7806*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m6, m7, m5, m4
7807*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m6, m7, m5, m4
7808*c0909341SAndroid Build Coastguard Worker    REPX    {pmulld x, m14}, m6, m7, m5, m4
7809*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m2, m0, m8, m1
7810*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m2, m0, m8, m1
7811*c0909341SAndroid Build Coastguard Worker    paddd                m6, m11
7812*c0909341SAndroid Build Coastguard Worker    paddd                m5, m11
7813*c0909341SAndroid Build Coastguard Worker    psubd                m3, m6, m7 ; t47
7814*c0909341SAndroid Build Coastguard Worker    paddd                m6, m7     ; t48
7815*c0909341SAndroid Build Coastguard Worker    psubd                m7, m5, m4 ; t40a
7816*c0909341SAndroid Build Coastguard Worker    paddd                m5, m4     ; t55a
7817*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, 12}, m3, m6, m7, m5
7818*c0909341SAndroid Build Coastguard Worker    mova         [r5-32* 8], m2
7819*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*32], m0
7820*c0909341SAndroid Build Coastguard Worker    mova         [r6-32* 8], m8
7821*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*32], m1
7822*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*24], m3
7823*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*16], m6
7824*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*24], m7
7825*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*16], m5
7826*c0909341SAndroid Build Coastguard Worker    add                  r6, 32
7827*c0909341SAndroid Build Coastguard Worker    sub                  r5, 32
7828*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r5
7829*c0909341SAndroid Build Coastguard Worker    jl .main_part2_loop
7830*c0909341SAndroid Build Coastguard Worker    ret
7831*c0909341SAndroid Build Coastguard Worker
7832*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_10bpc, 4, 7, 0, dst, stride, c, eob
7833*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
7834*c0909341SAndroid Build Coastguard Worker    jz .dconly
7835*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 11, 16, 32*134, dst, stride, c, eob
7836*c0909341SAndroid Build Coastguard Worker%undef cmp
7837*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
7838*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
7839*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*6]
7840*c0909341SAndroid Build Coastguard Worker    call .main
7841*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
7842*c0909341SAndroid Build Coastguard Worker    jl .fast
7843*c0909341SAndroid Build Coastguard Worker    call .main
7844*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 136
7845*c0909341SAndroid Build Coastguard Worker    jl .fast
7846*c0909341SAndroid Build Coastguard Worker    call .main
7847*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 300
7848*c0909341SAndroid Build Coastguard Worker    jl .fast
7849*c0909341SAndroid Build Coastguard Worker    call .main
7850*c0909341SAndroid Build Coastguard Worker    jmp .pass2
7851*c0909341SAndroid Build Coastguard Worker.dconly:
7852*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
7853*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m3, [dconly_10bpc]
7854*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
7855*c0909341SAndroid Build Coastguard Worker    or                  r3d, 64
7856*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
7857*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
7858*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
7859*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
7860*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
7861*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly2
7862*c0909341SAndroid Build Coastguard Worker.fast:
7863*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*70]
7864*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
7865*c0909341SAndroid Build Coastguard Worker.fast_loop:
7866*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
7867*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
7868*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r4
7869*c0909341SAndroid Build Coastguard Worker    jl .fast_loop
7870*c0909341SAndroid Build Coastguard Worker.pass2:
7871*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5 + 128]
7872*c0909341SAndroid Build Coastguard Worker    mov                 r10, rsp
7873*c0909341SAndroid Build Coastguard Worker    lea                  r8, [strideq*4]
7874*c0909341SAndroid Build Coastguard Worker    lea                  r9, [strideq*5]
7875*c0909341SAndroid Build Coastguard Worker    lea                  r3, [r9+strideq*1] ; stride*6
7876*c0909341SAndroid Build Coastguard Worker    lea                  r7, [r9+strideq*2] ; stride*7
7877*c0909341SAndroid Build Coastguard Worker.pass2_loop:
7878*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10+32* 2] ; in0
7879*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10+32* 6] ; in4
7880*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10+32*18] ; in8
7881*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10+32*22] ; in12
7882*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10+32*34] ; in16
7883*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10+32*38] ; in20
7884*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10+32*50] ; in24
7885*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32*54] ; in28
7886*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
7887*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14
7888*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m8
7889*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
7890*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*1]
7891*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*70]
7892*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m0
7893*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m1
7894*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m2
7895*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m3
7896*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m4
7897*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m5
7898*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m6
7899*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m7
7900*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7901*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m8
7902*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m9
7903*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m10
7904*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m11
7905*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m12
7906*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m13
7907*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m14
7908*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m15
7909*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10+32* 4] ; in2
7910*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10+32* 8] ; in6
7911*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10+32*20] ; in10
7912*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10+32*24] ; in14
7913*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10+32*36] ; in18
7914*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10+32*40] ; in22
7915*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10+32*52] ; in26
7916*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32*56] ; in30
7917*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r4+32*16]
7918*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7919*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
7920*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10+32* 3] ; in1
7921*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10+32*57] ; in31
7922*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10+32*35] ; in17
7923*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10+32*25] ; in15
7924*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10+32*19] ; in9
7925*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10+32*41] ; in23
7926*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10+32*51] ; in25
7927*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32* 9] ; in7
7928*c0909341SAndroid Build Coastguard Worker    lea                  r6, [idct64_mul - 8]
7929*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*16
7930*c0909341SAndroid Build Coastguard Worker    add                  r5, 32*32
7931*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
7932*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10+32* 7] ; in5
7933*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10+32*53] ; in27
7934*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10+32*39] ; in21
7935*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10+32*21] ; in11
7936*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10+32*23] ; in13
7937*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10+32*37] ; in19
7938*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10+32*55] ; in29
7939*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32* 5] ; in3
7940*c0909341SAndroid Build Coastguard Worker    add                  r6, 8
7941*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
7942*c0909341SAndroid Build Coastguard Worker    sub                  r5, 32*8
7943*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
7944*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2_pass2
7945*c0909341SAndroid Build Coastguard Worker    add                 r10, 32*8
7946*c0909341SAndroid Build Coastguard Worker    sub                  r4, 32*98 ; rsp+32*16
7947*c0909341SAndroid Build Coastguard Worker    sub                dstq, r8
7948*c0909341SAndroid Build Coastguard Worker    add                dstq, 32
7949*c0909341SAndroid Build Coastguard Worker    cmp                 r10, r4
7950*c0909341SAndroid Build Coastguard Worker    jl .pass2_loop
7951*c0909341SAndroid Build Coastguard Worker    RET
7952*c0909341SAndroid Build Coastguard WorkerALIGN function_align
7953*c0909341SAndroid Build Coastguard Worker.main:
7954*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
7955*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
7956*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 1]
7957*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 7]
7958*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128* 9]
7959*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*15]
7960*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128*17]
7961*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*23]
7962*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*25]
7963*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*31]
7964*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_rect2
7965*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 3]
7966*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 5]
7967*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*11]
7968*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*13]
7969*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128*19]
7970*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*21]
7971*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*27]
7972*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*29]
7973*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_rect2
7974*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 2]
7975*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 6]
7976*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*10]
7977*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*14]
7978*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128*18]
7979*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*22]
7980*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*26]
7981*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*30]
7982*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_rect2
7983*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 0]
7984*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 4]
7985*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128* 8]
7986*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*12]
7987*c0909341SAndroid Build Coastguard Worker    pmulld               m4, m14, [cq+128*16]
7988*c0909341SAndroid Build Coastguard Worker    pmulld               m5, m14, [cq+128*20]
7989*c0909341SAndroid Build Coastguard Worker    pmulld               m6, m14, [cq+128*24]
7990*c0909341SAndroid Build Coastguard Worker    pmulld               m7, m14, [cq+128*28]
7991*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
7992*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*29
7993*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
7994*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m15
7995*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m15
7996*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m15
7997*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m15
7998*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
7999*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
8000*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
8001*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main_rect2
8002*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
8003*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_32x16_10bpc).main_end
8004*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
8005*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m0
8006*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m1
8007*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m2
8008*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m3
8009*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m4
8010*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m5
8011*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m6
8012*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m7
8013*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r5+32*3]
8014*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32*2]
8015*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5+32*1]
8016*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r5+32*0]
8017*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r5-32*1]
8018*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*2]
8019*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r5-32*3]
8020*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*4]
8021*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose
8022*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m0
8023*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m1
8024*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m2
8025*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m3
8026*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m4
8027*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m5
8028*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m6
8029*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m7
8030*c0909341SAndroid Build Coastguard Worker    ret
8031*c0909341SAndroid Build Coastguard Worker
8032*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_10bpc, 4, 7, 0, dst, stride, c, eob
8033*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
8034*c0909341SAndroid Build Coastguard Worker    jnz .normal
8035*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
8036*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
8037*c0909341SAndroid Build Coastguard Worker    or                  r3d, 16
8038*c0909341SAndroid Build Coastguard Worker.dconly:
8039*c0909341SAndroid Build Coastguard Worker    add                 r6d, 640
8040*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 10
8041*c0909341SAndroid Build Coastguard Worker.dconly2:
8042*c0909341SAndroid Build Coastguard Worker    vpbroadcastd         m5, [dconly_10bpc]
8043*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
8044*c0909341SAndroid Build Coastguard Worker    add                 r6d, 2176
8045*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 12
8046*c0909341SAndroid Build Coastguard Worker    movd                xm0, r6d
8047*c0909341SAndroid Build Coastguard Worker    paddsw              xm0, xm5
8048*c0909341SAndroid Build Coastguard Worker    vpbroadcastw         m0, xm0
8049*c0909341SAndroid Build Coastguard Worker.dconly_loop:
8050*c0909341SAndroid Build Coastguard Worker    paddsw               m1, m0, [dstq+32*0]
8051*c0909341SAndroid Build Coastguard Worker    paddsw               m2, m0, [dstq+32*1]
8052*c0909341SAndroid Build Coastguard Worker    paddsw               m3, m0, [dstq+32*2]
8053*c0909341SAndroid Build Coastguard Worker    paddsw               m4, m0, [dstq+32*3]
8054*c0909341SAndroid Build Coastguard Worker    REPX    {psubusw x, m5}, m1, m2, m3, m4
8055*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*0], m1
8056*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*1], m2
8057*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*2], m3
8058*c0909341SAndroid Build Coastguard Worker    mova        [dstq+32*3], m4
8059*c0909341SAndroid Build Coastguard Worker    add                dstq, strideq
8060*c0909341SAndroid Build Coastguard Worker    dec                 r3d
8061*c0909341SAndroid Build Coastguard Worker    jg .dconly_loop
8062*c0909341SAndroid Build Coastguard Worker    RET
8063*c0909341SAndroid Build Coastguard Worker.normal:
8064*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 8, 16, 32*96, dst, stride, c, eob
8065*c0909341SAndroid Build Coastguard Worker%undef cmp
8066*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
8067*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
8068*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
8069*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
8070*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*4]
8071*c0909341SAndroid Build Coastguard Worker    call .main
8072*c0909341SAndroid Build Coastguard Worker    call .shift_transpose
8073*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
8074*c0909341SAndroid Build Coastguard Worker    jl .fast
8075*c0909341SAndroid Build Coastguard Worker    call .main
8076*c0909341SAndroid Build Coastguard Worker    call .shift_transpose
8077*c0909341SAndroid Build Coastguard Worker    jmp .pass2
8078*c0909341SAndroid Build Coastguard Worker.fast:
8079*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
8080*c0909341SAndroid Build Coastguard Worker    mov                 r3d, 4
8081*c0909341SAndroid Build Coastguard Worker.fast_loop:
8082*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
8083*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
8084*c0909341SAndroid Build Coastguard Worker    dec                 r3d
8085*c0909341SAndroid Build Coastguard Worker    jg .fast_loop
8086*c0909341SAndroid Build Coastguard Worker.pass2:
8087*c0909341SAndroid Build Coastguard Worker    lea                  r7, [r6-32*64]
8088*c0909341SAndroid Build Coastguard Worker    lea                  r4, [r6-32*32]
8089*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
8090*c0909341SAndroid Build Coastguard Worker    mov                  r5, dstq
8091*c0909341SAndroid Build Coastguard Worker.pass2_loop:
8092*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r7-32*4]
8093*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r7-32*3]
8094*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r7-32*2]
8095*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r7-32*1]
8096*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r7+32*0]
8097*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r7+32*1]
8098*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r7+32*2]
8099*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r7+32*3]
8100*c0909341SAndroid Build Coastguard Worker    add                  r7, 32*32
8101*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r7-32*4]
8102*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r7-32*3]
8103*c0909341SAndroid Build Coastguard Worker    mova                m10, [r7-32*2]
8104*c0909341SAndroid Build Coastguard Worker    mova                m11, [r7-32*1]
8105*c0909341SAndroid Build Coastguard Worker    mova                m12, [r7+32*0]
8106*c0909341SAndroid Build Coastguard Worker    mova                m13, [r7+32*1]
8107*c0909341SAndroid Build Coastguard Worker    mova                m14, [r7+32*2]
8108*c0909341SAndroid Build Coastguard Worker    mova                m15, [r7+32*3]
8109*c0909341SAndroid Build Coastguard Worker    sub                  r7, 32*24
8110*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
8111*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
8112*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*1]
8113*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_32x16_10bpc).write_16x16
8114*c0909341SAndroid Build Coastguard Worker    add                  r5, 32
8115*c0909341SAndroid Build Coastguard Worker    mov                dstq, r5
8116*c0909341SAndroid Build Coastguard Worker    cmp                  r7, r4
8117*c0909341SAndroid Build Coastguard Worker    jl .pass2_loop
8118*c0909341SAndroid Build Coastguard Worker    RET
8119*c0909341SAndroid Build Coastguard WorkerALIGN function_align
8120*c0909341SAndroid Build Coastguard Worker.main:
8121*c0909341SAndroid Build Coastguard Worker    lea                  r5, [idct64_mul_16bpc]
8122*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 1]
8123*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*31]
8124*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*17]
8125*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*15]
8126*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8127*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 7]
8128*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*25]
8129*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*23]
8130*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64* 9]
8131*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8132*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 5]
8133*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*27]
8134*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*21]
8135*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*11]
8136*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8137*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 3]
8138*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*29]
8139*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*19]
8140*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*13]
8141*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8142*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2
8143*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 2]
8144*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*14]
8145*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*18]
8146*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*30]
8147*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast
8148*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 6]
8149*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*10]
8150*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*22]
8151*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*26]
8152*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast
8153*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 4]
8154*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64*12]
8155*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*20]
8156*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*28]
8157*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_fast
8158*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+64* 0]
8159*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+64* 8]
8160*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+64*16]
8161*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+64*24]
8162*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
8163*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 64*30
8164*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
8165*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7-64*2], m15
8166*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7-64*1], m15
8167*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7+64*0], m15
8168*c0909341SAndroid Build Coastguard Worker    mova       [cq+r7+64*1], m15
8169*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 64*4
8170*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
8171*c0909341SAndroid Build Coastguard Worker.main_end:
8172*c0909341SAndroid Build Coastguard Worker    psrld               m15, m11, 10 ; pd_2
8173*c0909341SAndroid Build Coastguard Worker.main_end2:
8174*c0909341SAndroid Build Coastguard Worker    add                  cq, 32
8175*c0909341SAndroid Build Coastguard Worker    pxor                 m4, m4
8176*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m4}, m5, m6, m7
8177*c0909341SAndroid Build Coastguard Worker    call m(idct_8x8_internal_10bpc).main
8178*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
8179*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_evenhalf
8180*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*2], m1
8181*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*1], m2
8182*c0909341SAndroid Build Coastguard Worker    mova          [r6+32*0], m3
8183*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*1], m4
8184*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*2], m5
8185*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*3], m6
8186*c0909341SAndroid Build Coastguard Worker    mova          [r6-32*4], m7
8187*c0909341SAndroid Build Coastguard Worker    jmp .main_end_loop_start
8188*c0909341SAndroid Build Coastguard Worker.main_end_loop:
8189*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6+32* 3] ; idct8  0  + n
8190*c0909341SAndroid Build Coastguard Worker.main_end_loop_start:
8191*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r5+32* 4] ; idct16 15 - n
8192*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r5-32*12] ; idct32 16 + n
8193*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32*13] ; idct32 31 - n
8194*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6-32*29] ; idct64 63 - n
8195*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r5-32*28] ; idct64 48 + n
8196*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6-32*45] ; idct64 47 - n
8197*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r5-32*44] ; idct64 32 + n
8198*c0909341SAndroid Build Coastguard Worker    paddd                m8, m0, m1     ; idct16 out0  + n
8199*c0909341SAndroid Build Coastguard Worker    psubd                m0, m1         ; idct16 out15 - n
8200*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m8, m0
8201*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m8, m0
8202*c0909341SAndroid Build Coastguard Worker    paddd                m1, m8, m3     ; idct32 out0  + n
8203*c0909341SAndroid Build Coastguard Worker    psubd                m8, m3         ; idct32 out31 - n
8204*c0909341SAndroid Build Coastguard Worker    paddd                m3, m0, m2     ; idct32 out15 - n
8205*c0909341SAndroid Build Coastguard Worker    psubd                m0, m2         ; idct32 out16 + n
8206*c0909341SAndroid Build Coastguard Worker    REPX    {pmaxsd x, m12}, m1, m8, m3, m0
8207*c0909341SAndroid Build Coastguard Worker    REPX    {pminsd x, m13}, m1, m3, m8, m0
8208*c0909341SAndroid Build Coastguard Worker    REPX    {paddd  x, m15}, m1, m3, m0, m8
8209*c0909341SAndroid Build Coastguard Worker    paddd                m2, m1, m4     ; idct64 out0  + n (unshifted)
8210*c0909341SAndroid Build Coastguard Worker    psubd                m1, m4         ; idct64 out63 - n (unshifted)
8211*c0909341SAndroid Build Coastguard Worker    paddd                m4, m3, m5     ; idct64 out15 - n (unshifted)
8212*c0909341SAndroid Build Coastguard Worker    psubd                m3, m5         ; idct64 out48 + n (unshifted)
8213*c0909341SAndroid Build Coastguard Worker    paddd                m5, m0, m6     ; idct64 out16 + n (unshifted)
8214*c0909341SAndroid Build Coastguard Worker    psubd                m0, m6         ; idct64 out47 - n (unshifted)
8215*c0909341SAndroid Build Coastguard Worker    paddd                m6, m8, m7     ; idct64 out31 - n (unshifted)
8216*c0909341SAndroid Build Coastguard Worker    psubd                m8, m7         ; idct64 out32 + n (unshifted)
8217*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*44], m2
8218*c0909341SAndroid Build Coastguard Worker    mova         [r6+32* 3], m1
8219*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*45], m4
8220*c0909341SAndroid Build Coastguard Worker    mova         [r5+32* 4], m3
8221*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*28], m5
8222*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*13], m0
8223*c0909341SAndroid Build Coastguard Worker    mova         [r6-32*29], m6
8224*c0909341SAndroid Build Coastguard Worker    mova         [r5-32*12], m8
8225*c0909341SAndroid Build Coastguard Worker    add                  r5, 32
8226*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32
8227*c0909341SAndroid Build Coastguard Worker    cmp                  r5, r6
8228*c0909341SAndroid Build Coastguard Worker    jl .main_end_loop
8229*c0909341SAndroid Build Coastguard Worker    ret
8230*c0909341SAndroid Build Coastguard Worker.shift_transpose:
8231*c0909341SAndroid Build Coastguard Worker%macro IDCT64_SHIFT_TRANSPOSE 1 ; shift
8232*c0909341SAndroid Build Coastguard Worker    sub                  r6, 32*48
8233*c0909341SAndroid Build Coastguard Worker    mov                  r5, r6
8234*c0909341SAndroid Build Coastguard Worker%%loop:
8235*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r6-32* 4]
8236*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32* 4]
8237*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r6-32* 3]
8238*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r6+32* 5]
8239*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r6-32* 2]
8240*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32* 6]
8241*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r6-32* 1]
8242*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6+32* 7]
8243*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, %1}, m0, m4, m1, m5, m2, m6, m3, m7
8244*c0909341SAndroid Build Coastguard Worker    packssdw             m0, m4
8245*c0909341SAndroid Build Coastguard Worker    packssdw             m1, m5
8246*c0909341SAndroid Build Coastguard Worker    packssdw             m2, m6
8247*c0909341SAndroid Build Coastguard Worker    packssdw             m3, m7
8248*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r6+32* 0]
8249*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32* 8]
8250*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r6+32* 1]
8251*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6+32* 9]
8252*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, %1}, m4, m6, m5, m7
8253*c0909341SAndroid Build Coastguard Worker    packssdw             m4, m6
8254*c0909341SAndroid Build Coastguard Worker    packssdw             m5, m7
8255*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r6+32* 2]
8256*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r6+32*10]
8257*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r6+32* 3]
8258*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r6+32*11]
8259*c0909341SAndroid Build Coastguard Worker    REPX      {psrad x, %1}, m6, m8, m7, m9
8260*c0909341SAndroid Build Coastguard Worker    packssdw             m6, m8
8261*c0909341SAndroid Build Coastguard Worker    packssdw             m7, m9
8262*c0909341SAndroid Build Coastguard Worker    call m(idct_16x8_internal_10bpc).transpose3
8263*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*4], m0
8264*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*3], m1
8265*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*2], m2
8266*c0909341SAndroid Build Coastguard Worker    mova          [r5-32*1], m3
8267*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*0], m4
8268*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*1], m5
8269*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*2], m6
8270*c0909341SAndroid Build Coastguard Worker    mova          [r5+32*3], m7
8271*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*16
8272*c0909341SAndroid Build Coastguard Worker    add                  r5, 32*8
8273*c0909341SAndroid Build Coastguard Worker    cmp                  r5, r4
8274*c0909341SAndroid Build Coastguard Worker    jl %%loop
8275*c0909341SAndroid Build Coastguard Worker    mov                  r6, r4
8276*c0909341SAndroid Build Coastguard Worker%endmacro
8277*c0909341SAndroid Build Coastguard Worker    IDCT64_SHIFT_TRANSPOSE 2
8278*c0909341SAndroid Build Coastguard Worker    ret
8279*c0909341SAndroid Build Coastguard Worker
8280*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_10bpc, 4, 7, 0, dst, stride, c, eob
8281*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
8282*c0909341SAndroid Build Coastguard Worker    jz .dconly
8283*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 8, 16, 32*163, dst, stride, c, eob
8284*c0909341SAndroid Build Coastguard Worker%undef cmp
8285*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
8286*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
8287*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
8288*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
8289*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*7]
8290*c0909341SAndroid Build Coastguard Worker    call .main
8291*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
8292*c0909341SAndroid Build Coastguard Worker    jl .fast
8293*c0909341SAndroid Build Coastguard Worker    call .main
8294*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 136
8295*c0909341SAndroid Build Coastguard Worker    jl .fast
8296*c0909341SAndroid Build Coastguard Worker    call .main
8297*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 300
8298*c0909341SAndroid Build Coastguard Worker    jl .fast
8299*c0909341SAndroid Build Coastguard Worker    call .main
8300*c0909341SAndroid Build Coastguard Worker    jmp .pass2
8301*c0909341SAndroid Build Coastguard Worker.dconly:
8302*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
8303*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
8304*c0909341SAndroid Build Coastguard Worker    or                  r3d, 32
8305*c0909341SAndroid Build Coastguard Worker    add                 r6d, 128
8306*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 8
8307*c0909341SAndroid Build Coastguard Worker    imul                r6d, 181
8308*c0909341SAndroid Build Coastguard Worker    add                 r6d, 384
8309*c0909341SAndroid Build Coastguard Worker    sar                 r6d, 9
8310*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly2
8311*c0909341SAndroid Build Coastguard Worker.fast:
8312*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
8313*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*135]
8314*c0909341SAndroid Build Coastguard Worker.fast_loop:
8315*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
8316*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
8317*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r4
8318*c0909341SAndroid Build Coastguard Worker    jl .fast_loop
8319*c0909341SAndroid Build Coastguard Worker.pass2:
8320*c0909341SAndroid Build Coastguard Worker    lea                  r7, [r6-32*32]
8321*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r6+32*8]
8322*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
8323*c0909341SAndroid Build Coastguard Worker    imul                 r2, strideq, 19
8324*c0909341SAndroid Build Coastguard Worker    lea                  r3, [strideq*3]
8325*c0909341SAndroid Build Coastguard Worker    add                  r2, dstq
8326*c0909341SAndroid Build Coastguard Worker.pass2_loop:
8327*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r7-32*99]
8328*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r7-32*97]
8329*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r7-32*95]
8330*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r7-32*93]
8331*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r7-32*67]
8332*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r7-32*65]
8333*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r7-32*63]
8334*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r7-32*61]
8335*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r7-32*35]
8336*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r7-32*33]
8337*c0909341SAndroid Build Coastguard Worker    mova                m10, [r7-32*31]
8338*c0909341SAndroid Build Coastguard Worker    mova                m11, [r7-32*29]
8339*c0909341SAndroid Build Coastguard Worker    mova                m12, [r7-32* 3]
8340*c0909341SAndroid Build Coastguard Worker    mova                m13, [r7-32* 1]
8341*c0909341SAndroid Build Coastguard Worker    mova                m14, [r7+32* 1]
8342*c0909341SAndroid Build Coastguard Worker    mova                m15, [r7+32* 3]
8343*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf
8344*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r7-32*100]
8345*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r7-32*98]
8346*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r7-32*96]
8347*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r7-32*94]
8348*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r7-32*68]
8349*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r7-32*66]
8350*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r7-32*64]
8351*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r7-32*62]
8352*c0909341SAndroid Build Coastguard Worker    mova                 m8, [r7-32*36]
8353*c0909341SAndroid Build Coastguard Worker    mova                 m9, [r7-32*34]
8354*c0909341SAndroid Build Coastguard Worker    mova                m10, [r7-32*32]
8355*c0909341SAndroid Build Coastguard Worker    mova                m11, [r7-32*30]
8356*c0909341SAndroid Build Coastguard Worker    mova                m12, [r7-32* 4]
8357*c0909341SAndroid Build Coastguard Worker    mova                m13, [r7-32* 2]
8358*c0909341SAndroid Build Coastguard Worker    mova                m14, [r7+32* 0]
8359*c0909341SAndroid Build Coastguard Worker    mova                m15, [r7+32* 2]
8360*c0909341SAndroid Build Coastguard Worker    add                  r7, 32*8
8361*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m15
8362*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
8363*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end
8364*c0909341SAndroid Build Coastguard Worker    sub                dstq, r3
8365*c0909341SAndroid Build Coastguard Worker    lea                  r2, [r2+r3+32]
8366*c0909341SAndroid Build Coastguard Worker    add                dstq, 32
8367*c0909341SAndroid Build Coastguard Worker    cmp                  r7, r4
8368*c0909341SAndroid Build Coastguard Worker    jl .pass2_loop
8369*c0909341SAndroid Build Coastguard Worker    RET
8370*c0909341SAndroid Build Coastguard WorkerALIGN function_align
8371*c0909341SAndroid Build Coastguard Worker.main:
8372*c0909341SAndroid Build Coastguard Worker    lea                  r5, [idct64_mul_16bpc]
8373*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 1]
8374*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*31]
8375*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*17]
8376*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*15]
8377*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2
8378*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 7]
8379*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*25]
8380*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*23]
8381*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128* 9]
8382*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2
8383*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 5]
8384*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*27]
8385*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*21]
8386*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*11]
8387*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2
8388*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 3]
8389*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*29]
8390*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*19]
8391*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*13]
8392*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2
8393*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2
8394*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 2]
8395*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*14]
8396*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*18]
8397*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*30]
8398*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast_rect2
8399*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 6]
8400*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*10]
8401*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*22]
8402*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*26]
8403*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast_rect2
8404*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 4]
8405*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128*12]
8406*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*20]
8407*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*28]
8408*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_fast_rect2
8409*c0909341SAndroid Build Coastguard Worker    pmulld               m0, m14, [cq+128* 0]
8410*c0909341SAndroid Build Coastguard Worker    pmulld               m1, m14, [cq+128* 8]
8411*c0909341SAndroid Build Coastguard Worker    pmulld               m2, m14, [cq+128*16]
8412*c0909341SAndroid Build Coastguard Worker    pmulld               m3, m14, [cq+128*24]
8413*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
8414*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*29
8415*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
8416*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m15
8417*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m15
8418*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m15
8419*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m15
8420*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
8421*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
8422*c0909341SAndroid Build Coastguard Worker    psrld               m15, m11, 11 ; pd_1
8423*c0909341SAndroid Build Coastguard Worker    REPX     {paddd x, m11}, m0, m1, m2, m3
8424*c0909341SAndroid Build Coastguard Worker    REPX     {psrad x, 12 }, m0, m1, m2, m3
8425*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end2
8426*c0909341SAndroid Build Coastguard Worker    IDCT64_SHIFT_TRANSPOSE 1
8427*c0909341SAndroid Build Coastguard Worker    ret
8428*c0909341SAndroid Build Coastguard Worker
8429*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_10bpc, 4, 7, 0, dst, stride, c, eob
8430*c0909341SAndroid Build Coastguard Worker    test               eobd, eobd
8431*c0909341SAndroid Build Coastguard Worker    jz .dconly
8432*c0909341SAndroid Build Coastguard Worker    PROLOGUE              0, 11, 16, 32*195, dst, stride, c, eob
8433*c0909341SAndroid Build Coastguard Worker%undef cmp
8434*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m11, [pd_2048]
8435*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m12, [clip_18b_min]
8436*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m13, [clip_18b_max]
8437*c0909341SAndroid Build Coastguard Worker    vpbroadcastd        m14, [pd_2896]
8438*c0909341SAndroid Build Coastguard Worker    lea                  r6, [rsp+32*7]
8439*c0909341SAndroid Build Coastguard Worker    call .main
8440*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 36
8441*c0909341SAndroid Build Coastguard Worker    jl .fast
8442*c0909341SAndroid Build Coastguard Worker    call .main
8443*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 136
8444*c0909341SAndroid Build Coastguard Worker    jl .fast
8445*c0909341SAndroid Build Coastguard Worker    call .main
8446*c0909341SAndroid Build Coastguard Worker    cmp                eobd, 300
8447*c0909341SAndroid Build Coastguard Worker    jl .fast
8448*c0909341SAndroid Build Coastguard Worker    call .main
8449*c0909341SAndroid Build Coastguard Worker    jmp .pass2
8450*c0909341SAndroid Build Coastguard Worker.dconly:
8451*c0909341SAndroid Build Coastguard Worker    imul                r6d, [cq], 181
8452*c0909341SAndroid Build Coastguard Worker    mov                [cq], eobd ; 0
8453*c0909341SAndroid Build Coastguard Worker    or                  r3d, 64
8454*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly
8455*c0909341SAndroid Build Coastguard Worker.fast:
8456*c0909341SAndroid Build Coastguard Worker    pxor                 m0, m0
8457*c0909341SAndroid Build Coastguard Worker    lea                  r4, [rsp+32*135]
8458*c0909341SAndroid Build Coastguard Worker.fast_loop:
8459*c0909341SAndroid Build Coastguard Worker    REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3
8460*c0909341SAndroid Build Coastguard Worker    add                  r6, 32*8
8461*c0909341SAndroid Build Coastguard Worker    cmp                  r6, r4
8462*c0909341SAndroid Build Coastguard Worker    jl .fast_loop
8463*c0909341SAndroid Build Coastguard Worker.pass2:
8464*c0909341SAndroid Build Coastguard Worker    lea                 r10, [r6-32*32]
8465*c0909341SAndroid Build Coastguard Worker    lea                  r6, [pw_5+128]
8466*c0909341SAndroid Build Coastguard Worker    lea                  r8, [strideq*4]
8467*c0909341SAndroid Build Coastguard Worker    lea                  r9, [strideq*5]
8468*c0909341SAndroid Build Coastguard Worker    lea                  r3, [r9+strideq*1] ; stride*6
8469*c0909341SAndroid Build Coastguard Worker    lea                  r7, [r9+strideq*2] ; stride*7
8470*c0909341SAndroid Build Coastguard Worker.pass2_loop:
8471*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10-32*100] ; in0
8472*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10-32*96]  ; in4
8473*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10-32*68]  ; in8
8474*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10-32*64]  ; in12
8475*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10-32*36]  ; in16
8476*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10-32*32]  ; in20
8477*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10-32* 4]  ; in24
8478*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32* 0]  ; in28
8479*c0909341SAndroid Build Coastguard Worker    pxor                 m8, m8
8480*c0909341SAndroid Build Coastguard Worker    REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14
8481*c0909341SAndroid Build Coastguard Worker    mova              [rsp], m8
8482*c0909341SAndroid Build Coastguard Worker    call m(idct_16x16_internal_8bpc).main
8483*c0909341SAndroid Build Coastguard Worker    mova                 m1, [rsp+32*1]
8484*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m0
8485*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m1
8486*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m2
8487*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m3
8488*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m4
8489*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m5
8490*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m6
8491*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m7
8492*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
8493*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*4], m8
8494*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*3], m9
8495*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*2], m10
8496*c0909341SAndroid Build Coastguard Worker    mova          [r4-32*1], m11
8497*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*0], m12
8498*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*1], m13
8499*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*2], m14
8500*c0909341SAndroid Build Coastguard Worker    mova          [r4+32*3], m15
8501*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10-32*98] ; in2
8502*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10-32*94] ; in6
8503*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10-32*66] ; in10
8504*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10-32*62] ; in14
8505*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10-32*34] ; in18
8506*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10-32*30] ; in22
8507*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10-32* 2] ; in26
8508*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10+32* 2] ; in30
8509*c0909341SAndroid Build Coastguard Worker    lea                  r5, [r4+32*16]
8510*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
8511*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
8512*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10-32*99] ; in1
8513*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10+32* 3] ; in31
8514*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10-32*35] ; in17
8515*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10-32*61] ; in15
8516*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10-32*67] ; in9
8517*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10-32*29] ; in23
8518*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10-32* 3] ; in25
8519*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10-32*93] ; in7
8520*c0909341SAndroid Build Coastguard Worker    lea                  r6, [idct64_mul - 8]
8521*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*16
8522*c0909341SAndroid Build Coastguard Worker    add                  r5, 32*32
8523*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
8524*c0909341SAndroid Build Coastguard Worker    mova                 m0, [r10-32*95] ; in5
8525*c0909341SAndroid Build Coastguard Worker    mova                 m1, [r10-32* 1] ; in27
8526*c0909341SAndroid Build Coastguard Worker    mova                 m2, [r10-32*31] ; in21
8527*c0909341SAndroid Build Coastguard Worker    mova                 m3, [r10-32*65] ; in11
8528*c0909341SAndroid Build Coastguard Worker    mova                 m4, [r10-32*63] ; in13
8529*c0909341SAndroid Build Coastguard Worker    mova                 m5, [r10-32*33] ; in19
8530*c0909341SAndroid Build Coastguard Worker    mova                 m6, [r10+32* 1] ; in29
8531*c0909341SAndroid Build Coastguard Worker    mova                 m7, [r10-32*97] ; in3
8532*c0909341SAndroid Build Coastguard Worker    add                  r6, 8
8533*c0909341SAndroid Build Coastguard Worker    add                  r4, 32*8
8534*c0909341SAndroid Build Coastguard Worker    sub                  r5, 32*8
8535*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1
8536*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2_pass2
8537*c0909341SAndroid Build Coastguard Worker    add                 r10, 32*8
8538*c0909341SAndroid Build Coastguard Worker    sub                dstq, r8
8539*c0909341SAndroid Build Coastguard Worker    sub                  r4, 32*44
8540*c0909341SAndroid Build Coastguard Worker    add                dstq, 32
8541*c0909341SAndroid Build Coastguard Worker    cmp                 r10, r4
8542*c0909341SAndroid Build Coastguard Worker    jl .pass2_loop
8543*c0909341SAndroid Build Coastguard Worker    RET
8544*c0909341SAndroid Build Coastguard WorkerALIGN function_align
8545*c0909341SAndroid Build Coastguard Worker.main:
8546*c0909341SAndroid Build Coastguard Worker    lea                  r5, [idct64_mul_16bpc]
8547*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 1]
8548*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*31]
8549*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*17]
8550*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*15]
8551*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8552*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 7]
8553*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*25]
8554*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*23]
8555*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128* 9]
8556*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8557*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 5]
8558*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*27]
8559*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*21]
8560*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*11]
8561*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8562*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 3]
8563*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*29]
8564*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*19]
8565*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*13]
8566*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1
8567*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2
8568*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 2]
8569*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*14]
8570*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*18]
8571*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*30]
8572*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast
8573*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 6]
8574*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*10]
8575*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*22]
8576*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*26]
8577*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast
8578*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 4]
8579*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128*12]
8580*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*20]
8581*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*28]
8582*c0909341SAndroid Build Coastguard Worker    call m(idct_8x16_internal_10bpc).main_oddhalf_fast
8583*c0909341SAndroid Build Coastguard Worker    mova                 m0, [cq+128* 0]
8584*c0909341SAndroid Build Coastguard Worker    mova                 m1, [cq+128* 8]
8585*c0909341SAndroid Build Coastguard Worker    mova                 m2, [cq+128*16]
8586*c0909341SAndroid Build Coastguard Worker    mova                 m3, [cq+128*24]
8587*c0909341SAndroid Build Coastguard Worker    pxor                m15, m15
8588*c0909341SAndroid Build Coastguard Worker    mov                 r7d, 128*29
8589*c0909341SAndroid Build Coastguard Worker.main_zero_loop:
8590*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7-128*1], m15
8591*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*0], m15
8592*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*1], m15
8593*c0909341SAndroid Build Coastguard Worker    mova      [cq+r7+128*2], m15
8594*c0909341SAndroid Build Coastguard Worker    sub                 r7d, 128*4
8595*c0909341SAndroid Build Coastguard Worker    jg .main_zero_loop
8596*c0909341SAndroid Build Coastguard Worker    call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end
8597*c0909341SAndroid Build Coastguard Worker    jmp m(inv_txfm_add_dct_dct_64x16_10bpc).shift_transpose
8598*c0909341SAndroid Build Coastguard Worker
8599*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64
8600