1*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, VideoLAN and dav1d authors 2*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Two Orioles, LLC 3*c0909341SAndroid Build Coastguard Worker; Copyright © 2021, Matthias Dressel 4*c0909341SAndroid Build Coastguard Worker; All rights reserved. 5*c0909341SAndroid Build Coastguard Worker; 6*c0909341SAndroid Build Coastguard Worker; Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker; modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker; 9*c0909341SAndroid Build Coastguard Worker; 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker; list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker; 12*c0909341SAndroid Build Coastguard Worker; 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker; this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker; and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker; 16*c0909341SAndroid Build Coastguard Worker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker 27*c0909341SAndroid Build Coastguard Worker%include "config.asm" 28*c0909341SAndroid Build Coastguard Worker%include "ext/x86/x86inc.asm" 29*c0909341SAndroid Build Coastguard Worker 30*c0909341SAndroid Build Coastguard Worker%if ARCH_X86_64 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard WorkerSECTION_RODATA 32 33*c0909341SAndroid Build Coastguard Workeritx4_shuf: dd 0x50401600, 0xd0c09284, 0x70603422, 0xf0e0b0a6 34*c0909341SAndroid Build Coastguard Worker dd 0x50401701, 0xd0c09385, 0x70603523, 0xf0e0b1a7 35*c0909341SAndroid Build Coastguard Workeridct4_12_shuf: dd 0, 2, 4, 6, 1, 3, 5, 7 36*c0909341SAndroid Build Coastguard Workeridct4_12_shuf2: dd 2, 0, 6, 4, 3, 1, 7, 5 37*c0909341SAndroid Build Coastguard Workeriadst8_12_shuf: dd 0, 4, 1, 5, 2, 6, 3, 7 38*c0909341SAndroid Build Coastguard Workeridct16_12_shuf: dd 0, 4, 1, 5, 3, 7, 2, 6 39*c0909341SAndroid Build Coastguard Workeriadst16_12_shuf: dd 3, 7, 0, 4, 2, 6, 1, 5 40*c0909341SAndroid Build Coastguard Workerpw_2048_m2048: dw 2048, 2048, 2048, 2048, -2048, -2048, -2048, -2048 41*c0909341SAndroid Build Coastguard Workeridct4_shuf: db 0, 1, 4, 5, 12, 13, 8, 9, 2, 3, 6, 7, 14, 15, 10, 11 42*c0909341SAndroid Build Coastguard Workeridct32_shuf: db 0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15 43*c0909341SAndroid Build Coastguard Worker 44*c0909341SAndroid Build Coastguard Worker%macro COEF_PAIR 2-3 0 45*c0909341SAndroid Build Coastguard Workerpd_%1_%2: dd %1, %1, %2, %2 46*c0909341SAndroid Build Coastguard Worker%define pd_%1 (pd_%1_%2 + 4*0) 47*c0909341SAndroid Build Coastguard Worker%define pd_%2 (pd_%1_%2 + 4*2) 48*c0909341SAndroid Build Coastguard Worker%if %3 49*c0909341SAndroid Build Coastguard Workerdd -%2, -%2 50*c0909341SAndroid Build Coastguard Worker%define pd_%2_m%2 pd_%2 51*c0909341SAndroid Build Coastguard Worker%endif 52*c0909341SAndroid Build Coastguard Worker%endmacro 53*c0909341SAndroid Build Coastguard Worker 54*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 201, 995 55*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 401, 1931 56*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 799, 3406 57*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1380, 601 58*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 1751, 2440 59*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2598, 1189 60*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2751, 2106 61*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 1567, 1 62*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 2896, 3784, 1 63*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3035, 3513 64*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3166, 3920 65*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3703, 3290 66*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 3857, 4052 67*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4017, 2276 68*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4076, 3612 69*c0909341SAndroid Build Coastguard WorkerCOEF_PAIR 4091, 3973 70*c0909341SAndroid Build Coastguard Worker 71*c0909341SAndroid Build Coastguard Workerpd_8: dd 8 72*c0909341SAndroid Build Coastguard Workerpd_m601: dd -601 73*c0909341SAndroid Build Coastguard Workerpd_m1189: dd -1189 74*c0909341SAndroid Build Coastguard Workerpd_m1380: dd -1380 75*c0909341SAndroid Build Coastguard Workerpd_m2106: dd -2106 76*c0909341SAndroid Build Coastguard Workerpd_m2598: dd -2598 77*c0909341SAndroid Build Coastguard Workerpd_m2751: dd -2751 78*c0909341SAndroid Build Coastguard Workerpd_m3344: dd -3344 79*c0909341SAndroid Build Coastguard Workerpd_1024: dd 1024 80*c0909341SAndroid Build Coastguard Workerpd_1321: dd 1321 81*c0909341SAndroid Build Coastguard Workerpd_1448: dd 1448 82*c0909341SAndroid Build Coastguard Workerpd_1697: dd 1697 83*c0909341SAndroid Build Coastguard Workerpd_2482: dd 2482 84*c0909341SAndroid Build Coastguard Workerpd_3072: dd 3072 ; 1024 + 2048 85*c0909341SAndroid Build Coastguard Workerpd_3803: dd 3803 86*c0909341SAndroid Build Coastguard Workerpd_5119: dd 5119 ; 1024 + 4096 - 1 87*c0909341SAndroid Build Coastguard Workerpd_5120: dd 5120 ; 1024 + 4096 88*c0909341SAndroid Build Coastguard Workerpd_5793: dd 5793 89*c0909341SAndroid Build Coastguard Workerpd_6144: dd 6144 ; 2048 + 4096 90*c0909341SAndroid Build Coastguard Workerpd_17408: dd 17408 ; 1024 + 16384 91*c0909341SAndroid Build Coastguard Worker 92*c0909341SAndroid Build Coastguard Workerpixel_10bpc_max: times 2 dw 0x03ff 93*c0909341SAndroid Build Coastguard Workerpixel_12bpc_max: times 2 dw 0x0fff 94*c0909341SAndroid Build Coastguard Workerdconly_10bpc: times 2 dw 0x7c00 95*c0909341SAndroid Build Coastguard Workerdconly_12bpc: times 2 dw 0x7000 96*c0909341SAndroid Build Coastguard Workerclip_18b_min: dd -0x20000 97*c0909341SAndroid Build Coastguard Workerclip_18b_max: dd 0x1ffff 98*c0909341SAndroid Build Coastguard Workerclip_20b_min: dd -0x80000 99*c0909341SAndroid Build Coastguard Workerclip_20b_max: dd 0x7ffff 100*c0909341SAndroid Build Coastguard Worker 101*c0909341SAndroid Build Coastguard Workerconst idct64_mul_16bpc 102*c0909341SAndroid Build Coastguard Workerdd 4095, 101, 2967, -2824, 3745, 1660, 3822, -1474, 401, 4076, 799, 4017 103*c0909341SAndroid Build Coastguard Workerdd -700, 4036, 2359, 3349, -2191, 3461, 897, 3996, -2598, -3166, -4017, -799 104*c0909341SAndroid Build Coastguard Workerdd 4065, 501, 3229, -2520, 3564, 2019, 3948, -1092, 1931, 3612, 3406, 2276 105*c0909341SAndroid Build Coastguard Workerdd -301, 4085, 2675, 3102, -1842, 3659, 1285, 3889, -1189, -3920, -2276, -3406 106*c0909341SAndroid Build Coastguard Worker 107*c0909341SAndroid Build Coastguard Workercextern deint_shuf 108*c0909341SAndroid Build Coastguard Workercextern idct64_mul 109*c0909341SAndroid Build Coastguard Workercextern pw_1697x8 110*c0909341SAndroid Build Coastguard Workercextern pw_1697x16 111*c0909341SAndroid Build Coastguard Workercextern pw_1567_3784 112*c0909341SAndroid Build Coastguard Workercextern pw_m1567_m3784 113*c0909341SAndroid Build Coastguard Workercextern pw_m3784_1567 114*c0909341SAndroid Build Coastguard Workercextern pw_2896_2896 115*c0909341SAndroid Build Coastguard Workercextern pw_m2896_2896 116*c0909341SAndroid Build Coastguard Workercextern pw_5 117*c0909341SAndroid Build Coastguard Workercextern pw_2048 118*c0909341SAndroid Build Coastguard Workercextern pw_4096 119*c0909341SAndroid Build Coastguard Workercextern pw_8192 120*c0909341SAndroid Build Coastguard Workercextern pw_16384 121*c0909341SAndroid Build Coastguard Workercextern pw_2896x8 122*c0909341SAndroid Build Coastguard Workercextern pd_2048 123*c0909341SAndroid Build Coastguard Worker 124*c0909341SAndroid Build Coastguard Workercextern idct_4x8_internal_8bpc_avx2.main 125*c0909341SAndroid Build Coastguard Workercextern idct_4x16_internal_8bpc_avx2.main 126*c0909341SAndroid Build Coastguard Workercextern idct_8x8_internal_8bpc_avx2.main 127*c0909341SAndroid Build Coastguard Workercextern idct_8x16_internal_8bpc_avx2.main 128*c0909341SAndroid Build Coastguard Workercextern idct_16x4_internal_8bpc_avx2.main 129*c0909341SAndroid Build Coastguard Workercextern idct_16x8_internal_8bpc_avx2.main 130*c0909341SAndroid Build Coastguard Workercextern idct_16x16_internal_8bpc_avx2.main 131*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx2.main 132*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_8x32_8bpc_avx2.main_fast 133*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx2.main_oddhalf 134*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x32_8bpc_avx2.main_oddhalf_fast 135*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx2.main_part1 136*c0909341SAndroid Build Coastguard Workercextern inv_txfm_add_dct_dct_16x64_8bpc_avx2.main_part2_internal 137*c0909341SAndroid Build Coastguard Worker 138*c0909341SAndroid Build Coastguard Workercextern iadst_4x4_internal_8bpc_avx2.main 139*c0909341SAndroid Build Coastguard Workercextern iadst_4x8_internal_8bpc_avx2.main_pass2 140*c0909341SAndroid Build Coastguard Workercextern iadst_4x16_internal_8bpc_avx2.main2 141*c0909341SAndroid Build Coastguard Workercextern iadst_8x4_internal_8bpc_avx2.main 142*c0909341SAndroid Build Coastguard Workercextern iadst_8x8_internal_8bpc_avx2.main_pass2 143*c0909341SAndroid Build Coastguard Workercextern iadst_8x16_internal_8bpc_avx2.main 144*c0909341SAndroid Build Coastguard Workercextern iadst_8x16_internal_8bpc_avx2.main_pass2_end 145*c0909341SAndroid Build Coastguard Workercextern iadst_16x4_internal_8bpc_avx2.main 146*c0909341SAndroid Build Coastguard Workercextern iadst_16x8_internal_8bpc_avx2.main 147*c0909341SAndroid Build Coastguard Workercextern iadst_16x8_internal_8bpc_avx2.main_pass2_end 148*c0909341SAndroid Build Coastguard Workercextern iadst_16x16_internal_8bpc_avx2.main 149*c0909341SAndroid Build Coastguard Workercextern iadst_16x16_internal_8bpc_avx2.main_pass2_end 150*c0909341SAndroid Build Coastguard Worker 151*c0909341SAndroid Build Coastguard WorkerSECTION .text 152*c0909341SAndroid Build Coastguard Worker 153*c0909341SAndroid Build Coastguard Worker%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) 154*c0909341SAndroid Build Coastguard Worker 155*c0909341SAndroid Build Coastguard Worker%macro WRAP_XMM 1+ 156*c0909341SAndroid Build Coastguard Worker INIT_XMM cpuname 157*c0909341SAndroid Build Coastguard Worker %1 158*c0909341SAndroid Build Coastguard Worker INIT_YMM cpuname 159*c0909341SAndroid Build Coastguard Worker%endmacro 160*c0909341SAndroid Build Coastguard Worker 161*c0909341SAndroid Build Coastguard Worker%macro IWHT4_1D_PACKED 0 162*c0909341SAndroid Build Coastguard Worker ; m0 = in0 in2, m1 = in1 in3 163*c0909341SAndroid Build Coastguard Worker psubd m2, m0, m1 ; t2 164*c0909341SAndroid Build Coastguard Worker paddd xm0, xm1 ; t0 165*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3322 166*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q1100 167*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 168*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m2 169*c0909341SAndroid Build Coastguard Worker psrad m3, 1 170*c0909341SAndroid Build Coastguard Worker psubd m3, m1 ; t1 t3 171*c0909341SAndroid Build Coastguard Worker psubd m0, m3 ; ____ out0 172*c0909341SAndroid Build Coastguard Worker paddd m2, m3 ; out3 ____ 173*c0909341SAndroid Build Coastguard Worker%endmacro 174*c0909341SAndroid Build Coastguard Worker 175*c0909341SAndroid Build Coastguard WorkerINIT_YMM avx2 176*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_wht_wht_4x4_16bpc, 3, 7, 6, dst, stride, c, eob, bdmax 177*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*0] 178*c0909341SAndroid Build Coastguard Worker vinserti128 m0, [cq+16*2], 1 179*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16*1] 180*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [cq+16*3], 1 181*c0909341SAndroid Build Coastguard Worker pxor m4, m4 182*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 183*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 184*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 185*c0909341SAndroid Build Coastguard Worker psrad m0, 2 186*c0909341SAndroid Build Coastguard Worker psrad m1, 2 187*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 188*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m3 189*c0909341SAndroid Build Coastguard Worker punpckldq m3, m2 190*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m3 191*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m3 192*c0909341SAndroid Build Coastguard Worker IWHT4_1D_PACKED 193*c0909341SAndroid Build Coastguard Worker vpblendd m0, m2, 0x33 194*c0909341SAndroid Build Coastguard Worker packssdw m0, m3 195*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 196*c0909341SAndroid Build Coastguard Worker punpckhdq xm1, xm0, xm2 ; out2 out1 197*c0909341SAndroid Build Coastguard Worker punpckldq xm0, xm2 ; out3 out0 198*c0909341SAndroid Build Coastguard Worker movq xm2, [r6 +strideq*1] 199*c0909341SAndroid Build Coastguard Worker movhps xm2, [dstq+strideq*0] 200*c0909341SAndroid Build Coastguard Worker movq xm3, [r6 +strideq*0] 201*c0909341SAndroid Build Coastguard Worker movhps xm3, [dstq+strideq*1] 202*c0909341SAndroid Build Coastguard Worker%ifidn bdmaxd, bdmaxm 203*c0909341SAndroid Build Coastguard Worker movd xm5, bdmaxd 204*c0909341SAndroid Build Coastguard Worker vpbroadcastw xm5, xm5 205*c0909341SAndroid Build Coastguard Worker%else ; win64: load from stack 206*c0909341SAndroid Build Coastguard Worker vpbroadcastw xm5, bdmaxm 207*c0909341SAndroid Build Coastguard Worker%endif 208*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm2 209*c0909341SAndroid Build Coastguard Worker paddsw xm1, xm3 210*c0909341SAndroid Build Coastguard Worker pmaxsw xm0, xm4 211*c0909341SAndroid Build Coastguard Worker pmaxsw xm1, xm4 212*c0909341SAndroid Build Coastguard Worker pminsw xm0, xm5 213*c0909341SAndroid Build Coastguard Worker pminsw xm1, xm5 214*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*0], xm0 215*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm1 216*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm1 217*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*1], xm0 218*c0909341SAndroid Build Coastguard Worker RET 219*c0909341SAndroid Build Coastguard Worker 220*c0909341SAndroid Build Coastguard Worker; dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 221*c0909341SAndroid Build Coastguard Worker; dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 222*c0909341SAndroid Build Coastguard Worker; flags: 1 = packed, 2 = inv_dst2 223*c0909341SAndroid Build Coastguard Worker; skip round/shift if rnd is not a number 224*c0909341SAndroid Build Coastguard Worker%macro ITX_MULSUB_2D 8-9 0 ; dst/src[1-2], tmp[1-3], rnd, coef[1-2], flags 225*c0909341SAndroid Build Coastguard Worker%if %8 < 32 226*c0909341SAndroid Build Coastguard Worker pmulld m%4, m%1, m%8 227*c0909341SAndroid Build Coastguard Worker pmulld m%3, m%2, m%8 228*c0909341SAndroid Build Coastguard Worker%else 229*c0909341SAndroid Build Coastguard Worker%if %9 & 1 230*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m%3, [pd_%8] 231*c0909341SAndroid Build Coastguard Worker%else 232*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%3, [pd_%8] 233*c0909341SAndroid Build Coastguard Worker%endif 234*c0909341SAndroid Build Coastguard Worker pmulld m%4, m%1, m%3 235*c0909341SAndroid Build Coastguard Worker pmulld m%3, m%2 236*c0909341SAndroid Build Coastguard Worker%endif 237*c0909341SAndroid Build Coastguard Worker%if %7 < 32 238*c0909341SAndroid Build Coastguard Worker pmulld m%1, m%7 239*c0909341SAndroid Build Coastguard Worker pmulld m%2, m%7 240*c0909341SAndroid Build Coastguard Worker%else 241*c0909341SAndroid Build Coastguard Worker%if %9 & 1 242*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m%5, [pd_%7] 243*c0909341SAndroid Build Coastguard Worker%else 244*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%5, [pd_%7] 245*c0909341SAndroid Build Coastguard Worker%endif 246*c0909341SAndroid Build Coastguard Worker pmulld m%1, m%5 247*c0909341SAndroid Build Coastguard Worker pmulld m%2, m%5 248*c0909341SAndroid Build Coastguard Worker%endif 249*c0909341SAndroid Build Coastguard Worker%if %9 & 2 250*c0909341SAndroid Build Coastguard Worker psubd m%4, m%6, m%4 251*c0909341SAndroid Build Coastguard Worker psubd m%2, m%4, m%2 252*c0909341SAndroid Build Coastguard Worker%else 253*c0909341SAndroid Build Coastguard Worker%ifnum %6 254*c0909341SAndroid Build Coastguard Worker paddd m%4, m%6 255*c0909341SAndroid Build Coastguard Worker%endif 256*c0909341SAndroid Build Coastguard Worker paddd m%2, m%4 257*c0909341SAndroid Build Coastguard Worker%endif 258*c0909341SAndroid Build Coastguard Worker%ifnum %6 259*c0909341SAndroid Build Coastguard Worker paddd m%1, m%6 260*c0909341SAndroid Build Coastguard Worker%endif 261*c0909341SAndroid Build Coastguard Worker psubd m%1, m%3 262*c0909341SAndroid Build Coastguard Worker%ifnum %6 263*c0909341SAndroid Build Coastguard Worker psrad m%2, 12 264*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 265*c0909341SAndroid Build Coastguard Worker%endif 266*c0909341SAndroid Build Coastguard Worker%endmacro 267*c0909341SAndroid Build Coastguard Worker 268*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_FN 4-5 10 ; type1, type2, eob_offset, size, bitdepth 269*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_%1_%2_%4_%5bpc, 4, 5, 0, dst, stride, c, eob, tx2 270*c0909341SAndroid Build Coastguard Worker %define %%p1 m(i%1_%4_internal_%5bpc) 271*c0909341SAndroid Build Coastguard Worker ; Jump to the 1st txfm function if we're not taking the fast path, which 272*c0909341SAndroid Build Coastguard Worker ; in turn performs an indirect jump to the 2nd txfm function. 273*c0909341SAndroid Build Coastguard Worker lea tx2q, [m(i%2_%4_internal_%5bpc).pass2] 274*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 275*c0909341SAndroid Build Coastguard Worker test eobd, eobd 276*c0909341SAndroid Build Coastguard Worker jnz %%p1 277*c0909341SAndroid Build Coastguard Worker%else 278*c0909341SAndroid Build Coastguard Worker%if %3 279*c0909341SAndroid Build Coastguard Worker add eobd, %3 280*c0909341SAndroid Build Coastguard Worker%endif 281*c0909341SAndroid Build Coastguard Worker ; jump to the 1st txfm function unless it's located directly after this 282*c0909341SAndroid Build Coastguard Worker times ((%%end - %%p1) >> 31) & 1 jmp %%p1 283*c0909341SAndroid Build Coastguard WorkerALIGN function_align 284*c0909341SAndroid Build Coastguard Worker%%end: 285*c0909341SAndroid Build Coastguard Worker%endif 286*c0909341SAndroid Build Coastguard Worker%endmacro 287*c0909341SAndroid Build Coastguard Worker 288*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X4_FN 2-3 10 ; type1, type2, bitdepth 289*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 4x4, %3 290*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 291*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm2, [dconly_%3bpc] 292*c0909341SAndroid Build Coastguard Worker%if %3 = 10 293*c0909341SAndroid Build Coastguard Worker.dconly: 294*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 295*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 296*c0909341SAndroid Build Coastguard Worker or r3d, 4 297*c0909341SAndroid Build Coastguard Worker.dconly2: 298*c0909341SAndroid Build Coastguard Worker add r6d, 128 299*c0909341SAndroid Build Coastguard Worker sar r6d, 8 300*c0909341SAndroid Build Coastguard Worker.dconly3: 301*c0909341SAndroid Build Coastguard Worker imul r6d, 181 302*c0909341SAndroid Build Coastguard Worker add r6d, 2176 303*c0909341SAndroid Build Coastguard Worker sar r6d, 12 304*c0909341SAndroid Build Coastguard Worker movd xm0, r6d 305*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm2 306*c0909341SAndroid Build Coastguard Worker vpbroadcastw xm0, xm0 307*c0909341SAndroid Build Coastguard Worker.dconly_loop: 308*c0909341SAndroid Build Coastguard Worker movq xm1, [dstq+strideq*0] 309*c0909341SAndroid Build Coastguard Worker movhps xm1, [dstq+strideq*1] 310*c0909341SAndroid Build Coastguard Worker paddsw xm1, xm0 311*c0909341SAndroid Build Coastguard Worker psubusw xm1, xm2 312*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm1 313*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm1 314*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 315*c0909341SAndroid Build Coastguard Worker sub r3d, 2 316*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 317*c0909341SAndroid Build Coastguard Worker WRAP_XMM RET 318*c0909341SAndroid Build Coastguard Worker%else 319*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly 320*c0909341SAndroid Build Coastguard Worker%endif 321*c0909341SAndroid Build Coastguard Worker%endif 322*c0909341SAndroid Build Coastguard Worker%endmacro 323*c0909341SAndroid Build Coastguard Worker 324*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED 6 ; dst/src[1-2], tmp[1-3], rnd 325*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %1, %2, %3, %4, %5, %6, 2896_1567, 2896_3784, 1 326*c0909341SAndroid Build Coastguard Worker punpckhqdq m%3, m%2, m%1 ; t3 t2 327*c0909341SAndroid Build Coastguard Worker punpcklqdq m%2, m%1 ; t0 t1 328*c0909341SAndroid Build Coastguard Worker paddd m%1, m%2, m%3 ; out0 out1 329*c0909341SAndroid Build Coastguard Worker psubd m%2, m%3 ; out3 out2 330*c0909341SAndroid Build Coastguard Worker%endmacro 331*c0909341SAndroid Build Coastguard Worker 332*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D_PACKED_WORD 6 ; dst/src[1-2], tmp[1-3], rnd 333*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%5, [pw_m3784_1567] 334*c0909341SAndroid Build Coastguard Worker punpckhwd m%3, m%2, m%1 335*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%4, [pw_1567_3784] 336*c0909341SAndroid Build Coastguard Worker punpcklwd m%2, m%1 337*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%1, [pw_m2896_2896] 338*c0909341SAndroid Build Coastguard Worker pmaddwd m%5, m%3 339*c0909341SAndroid Build Coastguard Worker pmaddwd m%3, m%4 340*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%4, [pw_2896_2896] 341*c0909341SAndroid Build Coastguard Worker pmaddwd m%1, m%2 342*c0909341SAndroid Build Coastguard Worker pmaddwd m%2, m%4 343*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m%6}, m%5, m%3, m%1, m%2 344*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m%5, m%3, m%1, m%2 345*c0909341SAndroid Build Coastguard Worker packssdw m%3, m%5 ; t3 t2 346*c0909341SAndroid Build Coastguard Worker packssdw m%2, m%1 ; t0 t1 347*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%2, m%3 ; out0 out1 348*c0909341SAndroid Build Coastguard Worker psubsw m%2, m%3 ; out3 out2 349*c0909341SAndroid Build Coastguard Worker%endmacro 350*c0909341SAndroid Build Coastguard Worker 351*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct 352*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity 353*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst 354*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst 355*c0909341SAndroid Build Coastguard Worker 356*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2 357*c0909341SAndroid Build Coastguard Worker call .main 358*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [idct4_shuf] 359*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 360*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 361*c0909341SAndroid Build Coastguard Worker jmp tx2q 362*c0909341SAndroid Build Coastguard Worker.pass2: 363*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 364*c0909341SAndroid Build Coastguard Worker WRAP_XMM IDCT4_1D_PACKED_WORD 0, 1, 2, 3, 4, 5 365*c0909341SAndroid Build Coastguard Worker packssdw xm5, xm5 ; pw_2048 366*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm5 367*c0909341SAndroid Build Coastguard Worker pmulhrsw xm1, xm5 368*c0909341SAndroid Build Coastguard Worker movq xm2, [dstq+strideq*0] 369*c0909341SAndroid Build Coastguard Worker movhps xm2, [dstq+strideq*1] 370*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 371*c0909341SAndroid Build Coastguard Worker movq xm3, [r6 +strideq*1] 372*c0909341SAndroid Build Coastguard Worker movhps xm3, [r6 +strideq*0] 373*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 374*c0909341SAndroid Build Coastguard Worker pxor m4, m4 375*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 376*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 377*c0909341SAndroid Build Coastguard Worker paddw xm0, xm2 378*c0909341SAndroid Build Coastguard Worker paddw xm1, xm3 379*c0909341SAndroid Build Coastguard Worker pmaxsw xm0, xm4 380*c0909341SAndroid Build Coastguard Worker pmaxsw xm1, xm4 381*c0909341SAndroid Build Coastguard Worker pminsw xm0, xm5 382*c0909341SAndroid Build Coastguard Worker pminsw xm1, xm5 383*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 384*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 385*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*0], xm1 386*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*1], xm1 387*c0909341SAndroid Build Coastguard Worker RET 388*c0909341SAndroid Build Coastguard WorkerALIGN function_align 389*c0909341SAndroid Build Coastguard Worker.main: 390*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 391*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 392*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 393*c0909341SAndroid Build Coastguard Worker.main2: 394*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 0, 1, 2, 3, 4, 5 395*c0909341SAndroid Build Coastguard Worker ret 396*c0909341SAndroid Build Coastguard Worker 397*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct 398*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst 399*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst 400*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity 401*c0909341SAndroid Build Coastguard Worker 402*c0909341SAndroid Build Coastguard Worker%macro IADST4_1D 0 403*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_1321] 404*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2482] 405*c0909341SAndroid Build Coastguard Worker pmulld m4, m0, m5 ; 1321*in0 406*c0909341SAndroid Build Coastguard Worker pmulld m6, m3, m7 ; 2482*in3 407*c0909341SAndroid Build Coastguard Worker paddd m4, m6 ; 1321*in0 + 2482*in3 408*c0909341SAndroid Build Coastguard Worker pmulld m6, m0, m7 ; 2482*in0 409*c0909341SAndroid Build Coastguard Worker paddd m0, m3 ; in0 + in3 410*c0909341SAndroid Build Coastguard Worker paddd m7, m5 ; pd_3803 411*c0909341SAndroid Build Coastguard Worker pmulld m5, m2 ; 1321*in2 412*c0909341SAndroid Build Coastguard Worker pmulld m3, m7 ; 3803*in3 413*c0909341SAndroid Build Coastguard Worker pmulld m7, m2 ; 3803*in2 414*c0909341SAndroid Build Coastguard Worker psubd m2, m0 ; in2 - in0 - in3 415*c0909341SAndroid Build Coastguard Worker vpbroadcastd m0, [pd_m3344] 416*c0909341SAndroid Build Coastguard Worker pmulld m1, m0 ; -t3 417*c0909341SAndroid Build Coastguard Worker pmulld m2, m0 ; out2 (unrounded) 418*c0909341SAndroid Build Coastguard Worker psubd m6, m5 ; 2482*in0 - 1321*in2 419*c0909341SAndroid Build Coastguard Worker paddd m4, m7 ; t0 420*c0909341SAndroid Build Coastguard Worker psubd m6, m3 ; t1 421*c0909341SAndroid Build Coastguard Worker paddd m3, m4, m6 422*c0909341SAndroid Build Coastguard Worker psubd m4, m1 ; out0 (unrounded) 423*c0909341SAndroid Build Coastguard Worker psubd m6, m1 ; out1 (unrounded) 424*c0909341SAndroid Build Coastguard Worker paddd m3, m1 ; out3 (unrounded) 425*c0909341SAndroid Build Coastguard Worker%endmacro 426*c0909341SAndroid Build Coastguard Worker 427*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2 428*c0909341SAndroid Build Coastguard Worker call .main 429*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm6, 1 430*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m2, xm3, 1 431*c0909341SAndroid Build Coastguard Worker.pass1_end: 432*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 433*c0909341SAndroid Build Coastguard Worker mova m2, [itx4_shuf] 434*c0909341SAndroid Build Coastguard Worker paddd m0, m5 435*c0909341SAndroid Build Coastguard Worker paddd m1, m5 436*c0909341SAndroid Build Coastguard Worker psrad m0, 12 437*c0909341SAndroid Build Coastguard Worker psrad m1, 12 438*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 439*c0909341SAndroid Build Coastguard Worker vpermd m0, m2, m0 440*c0909341SAndroid Build Coastguard Worker psrld m2, 4 441*c0909341SAndroid Build Coastguard Worker pshufb m0, m2 442*c0909341SAndroid Build Coastguard Worker%if WIN64 443*c0909341SAndroid Build Coastguard Worker movaps xmm6, [rsp+ 8] 444*c0909341SAndroid Build Coastguard Worker movaps xmm7, [rsp+24] 445*c0909341SAndroid Build Coastguard Worker%endif 446*c0909341SAndroid Build Coastguard Worker jmp tx2q 447*c0909341SAndroid Build Coastguard Worker.pass2: 448*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 449*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 450*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 451*c0909341SAndroid Build Coastguard Worker.end: 452*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm4, [pw_2048] 453*c0909341SAndroid Build Coastguard Worker movq xm2, [dstq+strideq*0] 454*c0909341SAndroid Build Coastguard Worker movhps xm2, [dstq+strideq*1] 455*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 456*c0909341SAndroid Build Coastguard Worker movq xm3, [r6 +strideq*0] 457*c0909341SAndroid Build Coastguard Worker movhps xm3, [r6 +strideq*1] 458*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 459*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm4 460*c0909341SAndroid Build Coastguard Worker pmulhrsw xm1, xm4 461*c0909341SAndroid Build Coastguard Worker pxor m4, m4 462*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 463*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 464*c0909341SAndroid Build Coastguard Worker paddw xm0, xm2 465*c0909341SAndroid Build Coastguard Worker paddw xm1, xm3 466*c0909341SAndroid Build Coastguard Worker pmaxsw xm0, xm4 467*c0909341SAndroid Build Coastguard Worker pmaxsw xm1, xm4 468*c0909341SAndroid Build Coastguard Worker pminsw xm0, xm5 469*c0909341SAndroid Build Coastguard Worker pminsw xm1, xm5 470*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 471*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 472*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm1 473*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm1 474*c0909341SAndroid Build Coastguard Worker RET 475*c0909341SAndroid Build Coastguard WorkerALIGN function_align 476*c0909341SAndroid Build Coastguard Worker.main: 477*c0909341SAndroid Build Coastguard Worker mova xm0, [cq+16*0] 478*c0909341SAndroid Build Coastguard Worker mova xm1, [cq+16*1] 479*c0909341SAndroid Build Coastguard Worker mova xm2, [cq+16*2] 480*c0909341SAndroid Build Coastguard Worker mova xm3, [cq+16*3] 481*c0909341SAndroid Build Coastguard Worker%if WIN64 482*c0909341SAndroid Build Coastguard Worker movaps [rsp+16], xmm6 483*c0909341SAndroid Build Coastguard Worker movaps [rsp+32], xmm7 484*c0909341SAndroid Build Coastguard Worker%endif 485*c0909341SAndroid Build Coastguard Worker.main2: 486*c0909341SAndroid Build Coastguard Worker WRAP_XMM IADST4_1D 487*c0909341SAndroid Build Coastguard Worker ret 488*c0909341SAndroid Build Coastguard Worker 489*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct 490*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst 491*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst 492*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity 493*c0909341SAndroid Build Coastguard Worker 494*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2 495*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_10bpc).main 496*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m3, xm2, 1 497*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m6, xm4, 1 498*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_10bpc).pass1_end 499*c0909341SAndroid Build Coastguard Worker.pass2: 500*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 501*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 502*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_8bpc).main 503*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm4, [pw_2048] 504*c0909341SAndroid Build Coastguard Worker movq xm3, [dstq+strideq*1] 505*c0909341SAndroid Build Coastguard Worker movhps xm3, [dstq+strideq*0] 506*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 507*c0909341SAndroid Build Coastguard Worker movq xm2, [r6 +strideq*1] 508*c0909341SAndroid Build Coastguard Worker movhps xm2, [r6 +strideq*0] 509*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 510*c0909341SAndroid Build Coastguard Worker pmulhrsw xm0, xm4 511*c0909341SAndroid Build Coastguard Worker pmulhrsw xm1, xm4 512*c0909341SAndroid Build Coastguard Worker pxor m4, m4 513*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 514*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 515*c0909341SAndroid Build Coastguard Worker paddw xm0, xm2 516*c0909341SAndroid Build Coastguard Worker paddw xm1, xm3 517*c0909341SAndroid Build Coastguard Worker pmaxsw xm0, xm4 518*c0909341SAndroid Build Coastguard Worker pmaxsw xm1, xm4 519*c0909341SAndroid Build Coastguard Worker pminsw xm0, xm5 520*c0909341SAndroid Build Coastguard Worker pminsw xm1, xm5 521*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*0], xm1 522*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm1 523*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*0], xm0 524*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*1], xm0 525*c0909341SAndroid Build Coastguard Worker RET 526*c0909341SAndroid Build Coastguard Worker 527*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct 528*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst 529*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst 530*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity 531*c0909341SAndroid Build Coastguard Worker 532*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_10bpc, 0, 7, 6, dst, stride, c, eob, tx2 533*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pd_5793] 534*c0909341SAndroid Build Coastguard Worker pmulld m0, m1, [cq+32*0] 535*c0909341SAndroid Build Coastguard Worker pmulld m1, [cq+32*1] 536*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 537*c0909341SAndroid Build Coastguard Worker mova m3, [itx4_shuf] 538*c0909341SAndroid Build Coastguard Worker paddd m0, m5 539*c0909341SAndroid Build Coastguard Worker paddd m1, m5 540*c0909341SAndroid Build Coastguard Worker psrad m0, 12 541*c0909341SAndroid Build Coastguard Worker psrad m1, 12 542*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 543*c0909341SAndroid Build Coastguard Worker vpermd m0, m3, m0 544*c0909341SAndroid Build Coastguard Worker psrld m3, 4 545*c0909341SAndroid Build Coastguard Worker pshufb m0, m3 546*c0909341SAndroid Build Coastguard Worker jmp tx2q 547*c0909341SAndroid Build Coastguard Worker.pass2: 548*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pw_1697x8] 549*c0909341SAndroid Build Coastguard Worker movq xm2, [dstq+strideq*0] 550*c0909341SAndroid Build Coastguard Worker movhps xm2, [dstq+strideq*1] 551*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 552*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m0 553*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 554*c0909341SAndroid Build Coastguard Worker movq xm3, [r6 +strideq*0] 555*c0909341SAndroid Build Coastguard Worker movhps xm3, [r6 +strideq*1] 556*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm4, [pixel_10bpc_max] 557*c0909341SAndroid Build Coastguard Worker packssdw m5, m5 ; pw_2048 558*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m5 559*c0909341SAndroid Build Coastguard Worker pxor m5, m5 560*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m5 561*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m5 562*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 563*c0909341SAndroid Build Coastguard Worker paddw xm0, xm2 564*c0909341SAndroid Build Coastguard Worker paddw xm1, xm3 565*c0909341SAndroid Build Coastguard Worker pmaxsw xm0, xm5 566*c0909341SAndroid Build Coastguard Worker pmaxsw xm1, xm5 567*c0909341SAndroid Build Coastguard Worker pminsw xm0, xm4 568*c0909341SAndroid Build Coastguard Worker pminsw xm1, xm4 569*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 570*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 571*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm1 572*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm1 573*c0909341SAndroid Build Coastguard Worker RET 574*c0909341SAndroid Build Coastguard Worker 575*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, dct, 12 576*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, identity, 12 577*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, adst, 12 578*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN dct, flipadst, 12 579*c0909341SAndroid Build Coastguard Worker 580*c0909341SAndroid Build Coastguard Workercglobal idct_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2 581*c0909341SAndroid Build Coastguard Worker call m(idct_4x4_internal_10bpc).main 582*c0909341SAndroid Build Coastguard Worker mova m3, [idct4_12_shuf] 583*c0909341SAndroid Build Coastguard Worker mova m4, [idct4_12_shuf2] 584*c0909341SAndroid Build Coastguard Worker vpermd m2, m4, m1 585*c0909341SAndroid Build Coastguard Worker vpermd m1, m3, m0 586*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).pass1_end2 587*c0909341SAndroid Build Coastguard Worker.pass2: 588*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 589*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 590*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 591*c0909341SAndroid Build Coastguard Worker call m(idct_4x4_internal_10bpc).main2 592*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 593*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 594*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).end 595*c0909341SAndroid Build Coastguard Worker 596*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, dct, 12 597*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, adst, 12 598*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, flipadst, 12 599*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN adst, identity, 12 600*c0909341SAndroid Build Coastguard Worker 601*c0909341SAndroid Build Coastguard Workercglobal iadst_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2 602*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_10bpc).main 603*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m4, xm6, 1 604*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm3, 1 605*c0909341SAndroid Build Coastguard Worker.pass1_end: 606*c0909341SAndroid Build Coastguard Worker mova m3, [itx4_shuf] 607*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_1024] 608*c0909341SAndroid Build Coastguard Worker psrad m1, 1 609*c0909341SAndroid Build Coastguard Worker psrad m2, 1 610*c0909341SAndroid Build Coastguard Worker vpermd m1, m3, m1 611*c0909341SAndroid Build Coastguard Worker vpermd m2, m3, m2 612*c0909341SAndroid Build Coastguard Worker paddd m1, m5 613*c0909341SAndroid Build Coastguard Worker paddd m2, m5 614*c0909341SAndroid Build Coastguard Worker psrad m1, 11 615*c0909341SAndroid Build Coastguard Worker psrad m2, 11 616*c0909341SAndroid Build Coastguard Worker.pass1_end2: 617*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [clip_18b_min] 618*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [clip_18b_max] 619*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1, m2 620*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m2 621*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m3 622*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m3 623*c0909341SAndroid Build Coastguard Worker pminsd m0, m4 624*c0909341SAndroid Build Coastguard Worker pminsd m1, m4 625*c0909341SAndroid Build Coastguard Worker jmp tx2q 626*c0909341SAndroid Build Coastguard Worker.pass2: 627*c0909341SAndroid Build Coastguard Worker call .main_pass2 628*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m4, xm6, 1 629*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m2, xm3, 1 630*c0909341SAndroid Build Coastguard Worker.pass2_end: 631*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 632*c0909341SAndroid Build Coastguard Worker paddd m0, m5 633*c0909341SAndroid Build Coastguard Worker paddd m1, m5 634*c0909341SAndroid Build Coastguard Worker psrad m0, 12 635*c0909341SAndroid Build Coastguard Worker psrad m1, 12 636*c0909341SAndroid Build Coastguard Worker.end: 637*c0909341SAndroid Build Coastguard Worker%if WIN64 638*c0909341SAndroid Build Coastguard Worker WIN64_RESTORE_XMM_INTERNAL 639*c0909341SAndroid Build Coastguard Worker %assign xmm_regs_used 6 640*c0909341SAndroid Build Coastguard Worker%endif 641*c0909341SAndroid Build Coastguard Worker.end2: 642*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_16384] 643*c0909341SAndroid Build Coastguard Worker movq xm2, [dstq+strideq*0] 644*c0909341SAndroid Build Coastguard Worker movq xm3, [dstq+strideq*1] 645*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 646*c0909341SAndroid Build Coastguard Worker movhps xm2, [r6 +strideq*0] ; dst0 dst2 647*c0909341SAndroid Build Coastguard Worker movhps xm3, [r6 +strideq*1] ; dst1 dst3 648*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 649*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm3, 1 650*c0909341SAndroid Build Coastguard Worker psrad m0, 3 651*c0909341SAndroid Build Coastguard Worker psrad m1, 3 652*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 ; t0 t2 t1 t3 653*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 654*c0909341SAndroid Build Coastguard Worker pxor m4, m4 655*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m4 656*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m4 657*c0909341SAndroid Build Coastguard Worker paddw m0, m2 ; out0 out2 out1 out3 658*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m4 659*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 660*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 ; out1 out3 661*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 662*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm1 663*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*0], xm0 664*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm1 665*c0909341SAndroid Build Coastguard Worker RET 666*c0909341SAndroid Build Coastguard Worker.main_pass2: 667*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 668*c0909341SAndroid Build Coastguard Worker mova xm2, xm1 669*c0909341SAndroid Build Coastguard Worker vextracti128 xm1, m0, 1 670*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_10bpc).main2 671*c0909341SAndroid Build Coastguard Worker 672*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, dct, 12 673*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, adst, 12 674*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, flipadst, 12 675*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN flipadst, identity, 12 676*c0909341SAndroid Build Coastguard Worker 677*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2 678*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_10bpc).main 679*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m3, xm2, 1 680*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m6, xm4, 1 681*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).pass1_end 682*c0909341SAndroid Build Coastguard Worker.pass2: 683*c0909341SAndroid Build Coastguard Worker call m(iadst_4x4_internal_12bpc).main_pass2 684*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m3, xm2, 1 685*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m6, xm4, 1 686*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).pass2_end 687*c0909341SAndroid Build Coastguard Worker 688*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, dct, 12 689*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, adst, 12 690*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, flipadst, 12 691*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X4_FN identity, identity, 12 692*c0909341SAndroid Build Coastguard Worker 693*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x4_internal_12bpc, 0, 7, 8, dst, stride, c, eob, tx2 694*c0909341SAndroid Build Coastguard Worker mova m2, [itx4_shuf] 695*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [pd_1697] 696*c0909341SAndroid Build Coastguard Worker vpermd m0, m2, [cq+32*0] 697*c0909341SAndroid Build Coastguard Worker vpermd m2, m2, [cq+32*1] 698*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 699*c0909341SAndroid Build Coastguard Worker pmulld m1, m3, m0 700*c0909341SAndroid Build Coastguard Worker pmulld m3, m2 701*c0909341SAndroid Build Coastguard Worker paddd m1, m5 702*c0909341SAndroid Build Coastguard Worker paddd m3, m5 703*c0909341SAndroid Build Coastguard Worker psrad m1, 12 704*c0909341SAndroid Build Coastguard Worker psrad m3, 12 705*c0909341SAndroid Build Coastguard Worker paddd m1, m0 706*c0909341SAndroid Build Coastguard Worker paddd m2, m3 707*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).pass1_end2 708*c0909341SAndroid Build Coastguard Worker.pass2: 709*c0909341SAndroid Build Coastguard Worker ; m0 = in0 in1 710*c0909341SAndroid Build Coastguard Worker ; m1 = in2 in3 711*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [pd_5793] 712*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 713*c0909341SAndroid Build Coastguard Worker pmulld m0, m3 714*c0909341SAndroid Build Coastguard Worker pmulld m1, m3 715*c0909341SAndroid Build Coastguard Worker paddd m0, m5 ; 2048 716*c0909341SAndroid Build Coastguard Worker paddd m1, m5 717*c0909341SAndroid Build Coastguard Worker psrad m0, 12 718*c0909341SAndroid Build Coastguard Worker psrad m1, 12 719*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x4_internal_12bpc).end 720*c0909341SAndroid Build Coastguard Worker 721*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X8_FN 2-3 10 ; type1, type2, bitdepth 722*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 4x8, %3 723*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 724*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm2, [dconly_%3bpc] 725*c0909341SAndroid Build Coastguard Worker%if %3 = 10 726*c0909341SAndroid Build Coastguard Worker.dconly: 727*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 728*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 729*c0909341SAndroid Build Coastguard Worker or r3d, 8 730*c0909341SAndroid Build Coastguard Worker add r6d, 128 731*c0909341SAndroid Build Coastguard Worker sar r6d, 8 732*c0909341SAndroid Build Coastguard Worker imul r6d, 181 733*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly2 734*c0909341SAndroid Build Coastguard Worker%else 735*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_4x8_10bpc).dconly 736*c0909341SAndroid Build Coastguard Worker%endif 737*c0909341SAndroid Build Coastguard Worker%endif 738*c0909341SAndroid Build Coastguard Worker%endmacro 739*c0909341SAndroid Build Coastguard Worker 740*c0909341SAndroid Build Coastguard Worker%macro IDCT4_1D 8 ; src[1-4], tmp[1-3], rnd 741*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %2, %4, %5, %6, %7, %8, 1567, 3784 ; t2, t3 742*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%5, [pd_2896] 743*c0909341SAndroid Build Coastguard Worker pmulld m%1, m%5 744*c0909341SAndroid Build Coastguard Worker pmulld m%3, m%5 745*c0909341SAndroid Build Coastguard Worker paddd m%1, m%8 746*c0909341SAndroid Build Coastguard Worker paddd m%5, m%1, m%3 747*c0909341SAndroid Build Coastguard Worker psubd m%1, m%3 748*c0909341SAndroid Build Coastguard Worker psrad m%5, 12 ; t0 749*c0909341SAndroid Build Coastguard Worker psrad m%1, 12 ; t1 750*c0909341SAndroid Build Coastguard Worker psubd m%3, m%1, m%2 751*c0909341SAndroid Build Coastguard Worker paddd m%2, m%1 752*c0909341SAndroid Build Coastguard Worker paddd m%1, m%5, m%4 753*c0909341SAndroid Build Coastguard Worker psubd m%4, m%5, m%4 754*c0909341SAndroid Build Coastguard Worker%endmacro 755*c0909341SAndroid Build Coastguard Worker 756*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct 757*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity 758*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst 759*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst 760*c0909341SAndroid Build Coastguard Worker 761*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2 762*c0909341SAndroid Build Coastguard Worker.pass1: 763*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [pd_2896] 764*c0909341SAndroid Build Coastguard Worker pmulld m0, m3, [cq+32*0] 765*c0909341SAndroid Build Coastguard Worker pmulld m1, m3, [cq+32*1] 766*c0909341SAndroid Build Coastguard Worker pmulld m2, m3, [cq+32*2] 767*c0909341SAndroid Build Coastguard Worker pmulld m3, m3, [cq+32*3] 768*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 769*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3 770*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 771*c0909341SAndroid Build Coastguard Worker IDCT4_1D 0, 1, 2, 3, 4, 5, 6, 7 772*c0909341SAndroid Build Coastguard Worker jmp tx2q 773*c0909341SAndroid Build Coastguard Worker.pass2: 774*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 775*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 776*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 777*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 778*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 779*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 ; 2 3 780*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 ; 0 1 781*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 ; 4 5 782*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 ; 6 7 783*c0909341SAndroid Build Coastguard Worker call m(idct_4x8_internal_8bpc).main 784*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm4, [pw_2048] 785*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, xm4}, xm0, xm1, xm2, xm3 786*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 787*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 788*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*0] 789*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*1] 790*c0909341SAndroid Build Coastguard Worker movq xm5, [dstq+r3 ] 791*c0909341SAndroid Build Coastguard Worker movhps xm5, [dstq+strideq*2] 792*c0909341SAndroid Build Coastguard Worker movq xm6, [r6 +strideq*0] 793*c0909341SAndroid Build Coastguard Worker movhps xm6, [r6 +strideq*1] 794*c0909341SAndroid Build Coastguard Worker movq xm7, [r6 +r3 ] 795*c0909341SAndroid Build Coastguard Worker movhps xm7, [r6 +strideq*2] 796*c0909341SAndroid Build Coastguard Worker paddw xm0, xm4 ; 0 1 797*c0909341SAndroid Build Coastguard Worker paddw xm1, xm5 ; 3 2 798*c0909341SAndroid Build Coastguard Worker paddw xm2, xm6 ; 4 5 799*c0909341SAndroid Build Coastguard Worker paddw xm3, xm7 ; 7 6 800*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 801*c0909341SAndroid Build Coastguard Worker pxor m4, m4 802*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 803*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, xm4}, xm0, xm1, xm2, xm3 804*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, xm5}, xm0, xm1, xm2, xm3 805*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 806*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 807*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm1 808*c0909341SAndroid Build Coastguard Worker movq [dstq+r3 ], xm1 809*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm2 810*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm2 811*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*2], xm3 812*c0909341SAndroid Build Coastguard Worker movq [r6 +r3 ], xm3 813*c0909341SAndroid Build Coastguard Worker RET 814*c0909341SAndroid Build Coastguard Worker 815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct 816*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst 817*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst 818*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity 819*c0909341SAndroid Build Coastguard Worker 820*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2 821*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_10bpc).main 822*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 823*c0909341SAndroid Build Coastguard Worker paddd m0, m5, m4 824*c0909341SAndroid Build Coastguard Worker paddd m1, m5, m6 825*c0909341SAndroid Build Coastguard Worker paddd m2, m5 826*c0909341SAndroid Build Coastguard Worker paddd m3, m5 827*c0909341SAndroid Build Coastguard Worker.pass1_end: 828*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 829*c0909341SAndroid Build Coastguard Worker jmp tx2q 830*c0909341SAndroid Build Coastguard Worker.pass2: 831*c0909341SAndroid Build Coastguard Worker call .pass2_main 832*c0909341SAndroid Build Coastguard Worker mova xm4, [pw_2048_m2048] 833*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, xm4}, xm0, xm1, xm2, xm3 834*c0909341SAndroid Build Coastguard Worker.end: 835*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 836*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 837*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*0] 838*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*1] 839*c0909341SAndroid Build Coastguard Worker movq xm5, [dstq+strideq*2] 840*c0909341SAndroid Build Coastguard Worker movhps xm5, [dstq+r3 ] 841*c0909341SAndroid Build Coastguard Worker movq xm6, [r6 +strideq*0] 842*c0909341SAndroid Build Coastguard Worker movhps xm6, [r6 +strideq*1] 843*c0909341SAndroid Build Coastguard Worker movq xm7, [r6 +strideq*2] 844*c0909341SAndroid Build Coastguard Worker movhps xm7, [r6 +r3 ] 845*c0909341SAndroid Build Coastguard Worker paddw xm0, xm4 ; 0 1 846*c0909341SAndroid Build Coastguard Worker paddw xm1, xm5 ; 2 3 847*c0909341SAndroid Build Coastguard Worker paddw xm2, xm6 ; 4 5 848*c0909341SAndroid Build Coastguard Worker paddw xm3, xm7 ; 6 7 849*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 850*c0909341SAndroid Build Coastguard Worker pxor m4, m4 851*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 852*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, xm4}, xm0, xm1, xm2, xm3 853*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, xm5}, xm0, xm1, xm2, xm3 854*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 855*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 856*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*2], xm1 857*c0909341SAndroid Build Coastguard Worker movhps [dstq+r3 ], xm1 858*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm2 859*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm2 860*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*2], xm3 861*c0909341SAndroid Build Coastguard Worker movhps [r6 +r3 ], xm3 862*c0909341SAndroid Build Coastguard Worker RET 863*c0909341SAndroid Build Coastguard WorkerALIGN function_align 864*c0909341SAndroid Build Coastguard Worker.pass2_main: 865*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 866*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 867*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 868*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m0, m1 869*c0909341SAndroid Build Coastguard Worker punpckhwd m0, m1 870*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m4, m0 871*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0 872*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m4, 1 ; 4 5 873*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m5, 1 ; 6 7 874*c0909341SAndroid Build Coastguard Worker pshufd xm4, xm4, q1032 ; 1 0 875*c0909341SAndroid Build Coastguard Worker pshufd xm5, xm5, q1032 ; 3 2 876*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_8bpc).main_pass2 877*c0909341SAndroid Build Coastguard WorkerALIGN function_align 878*c0909341SAndroid Build Coastguard Worker.main: 879*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 880*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 881*c0909341SAndroid Build Coastguard Worker.main2: 882*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [cq+16*0] 883*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16*2] 884*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m3, [cq+16*5] 885*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m1, [cq+16*7] 886*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_2896] 887*c0909341SAndroid Build Coastguard Worker shufpd m0, m2, 0x0c ; 0 2 888*c0909341SAndroid Build Coastguard Worker shufpd m1, m3, 0x0c ; 7 5 889*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16*4] 890*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16*6] 891*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16*1] 892*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m3, [cq+16*3] 893*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 894*c0909341SAndroid Build Coastguard Worker shufpd m2, m4, 0x0c ; 4 6 895*c0909341SAndroid Build Coastguard Worker shufpd m3, m5, 0x0c ; 3 1 896*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m6}, m0, m1, m2, m3 897*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3 898*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 899*c0909341SAndroid Build Coastguard Worker.main3: 900*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 4, 5, 6, 7, 401_1931, 4076_3612, 1 901*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 4, 5, 6, 7, 3166_3920, 2598_1189, 1 902*c0909341SAndroid Build Coastguard Worker psubd m4, m0, m2 ; t4 t6 903*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t0 t2 904*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m3 ; t5 t7 905*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t1 t3 906*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m4, m2, m0, m1 907*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m4, m2, m0, m1 908*c0909341SAndroid Build Coastguard Worker pxor m5, m5 909*c0909341SAndroid Build Coastguard Worker psubd m5, m4 910*c0909341SAndroid Build Coastguard Worker vpblendd m4, m2, 0xcc ; t4 t7 911*c0909341SAndroid Build Coastguard Worker vpblendd m2, m5, 0xcc ; t5 -t6 912*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 2, 3, 5, 6, 7, 1567, 3784 913*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2896] 914*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [pw_2048_m2048] ; + + - - 915*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m0, m1 916*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 917*c0909341SAndroid Build Coastguard Worker psubd m1, m0, m3 ; t2 t3 918*c0909341SAndroid Build Coastguard Worker paddd m0, m3 ; out0 -out7 919*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4, m2 ; t7a t6a 920*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m2 ; t5a t4a 921*c0909341SAndroid Build Coastguard Worker psubd m2, m4, m3 ; t7 t6 922*c0909341SAndroid Build Coastguard Worker paddd m4, m3 ; out6 -out1 923*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m1, m2 924*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m1, m2 925*c0909341SAndroid Build Coastguard Worker vpblendd m3, m1, m2, 0xcc 926*c0909341SAndroid Build Coastguard Worker shufpd m1, m2, 0x05 927*c0909341SAndroid Build Coastguard Worker pmulld m3, m5 928*c0909341SAndroid Build Coastguard Worker pmulld m5, m1 929*c0909341SAndroid Build Coastguard Worker psignd m0, m6 ; out0 out7 930*c0909341SAndroid Build Coastguard Worker psignd m4, m6 ; out6 out1 931*c0909341SAndroid Build Coastguard Worker paddd m3, m7 932*c0909341SAndroid Build Coastguard Worker psubd m2, m3, m5 933*c0909341SAndroid Build Coastguard Worker paddd m5, m3 934*c0909341SAndroid Build Coastguard Worker psrad m2, 12 ; out4 -out5 935*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; -out3 out2 936*c0909341SAndroid Build Coastguard Worker ret 937*c0909341SAndroid Build Coastguard Worker 938*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct 939*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst 940*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst 941*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity 942*c0909341SAndroid Build Coastguard Worker 943*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2 944*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_10bpc).main 945*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 946*c0909341SAndroid Build Coastguard Worker paddd m0, m5, m3 947*c0909341SAndroid Build Coastguard Worker paddd m1, m5, m2 948*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m6 949*c0909341SAndroid Build Coastguard Worker paddd m3, m5, m4 950*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_10bpc).pass1_end 951*c0909341SAndroid Build Coastguard Worker.pass2: 952*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_10bpc).pass2_main 953*c0909341SAndroid Build Coastguard Worker mova xm4, [pw_2048_m2048] 954*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, xm4}, xm3, xm2, xm1, xm0 955*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 956*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 957*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*1] 958*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*0] 959*c0909341SAndroid Build Coastguard Worker movq xm5, [dstq+r3 ] 960*c0909341SAndroid Build Coastguard Worker movhps xm5, [dstq+strideq*2] 961*c0909341SAndroid Build Coastguard Worker movq xm6, [r6 +strideq*1] 962*c0909341SAndroid Build Coastguard Worker movhps xm6, [r6 +strideq*0] 963*c0909341SAndroid Build Coastguard Worker movq xm7, [r6 +r3 ] 964*c0909341SAndroid Build Coastguard Worker movhps xm7, [r6 +strideq*2] 965*c0909341SAndroid Build Coastguard Worker paddw xm3, xm4 ; 1 0 966*c0909341SAndroid Build Coastguard Worker paddw xm2, xm5 ; 3 2 967*c0909341SAndroid Build Coastguard Worker paddw xm1, xm6 ; 5 4 968*c0909341SAndroid Build Coastguard Worker paddw xm0, xm7 ; 7 6 969*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pixel_10bpc_max] 970*c0909341SAndroid Build Coastguard Worker pxor m4, m4 971*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 972*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, xm4}, xm3, xm2, xm1, xm0 973*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, xm5}, xm3, xm2, xm1, xm0 974*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*0], xm3 975*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm3 976*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm2 977*c0909341SAndroid Build Coastguard Worker movq [dstq+r3 ], xm2 978*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*0], xm1 979*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*1], xm1 980*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*2], xm0 981*c0909341SAndroid Build Coastguard Worker movq [r6 +r3 ], xm0 982*c0909341SAndroid Build Coastguard Worker RET 983*c0909341SAndroid Build Coastguard Worker 984*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct 985*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst 986*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst 987*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity 988*c0909341SAndroid Build Coastguard Worker 989*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_10bpc, 0, 7, 8, dst, stride, c, eob, tx2 990*c0909341SAndroid Build Coastguard Worker.pass1: 991*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [pd_2896] 992*c0909341SAndroid Build Coastguard Worker pmulld m0, m3, [cq+32*0] 993*c0909341SAndroid Build Coastguard Worker pmulld m1, m3, [cq+32*1] 994*c0909341SAndroid Build Coastguard Worker pmulld m2, m3, [cq+32*2] 995*c0909341SAndroid Build Coastguard Worker pmulld m3, [cq+32*3] 996*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 997*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_5793] 998*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m0, m1, m2, m3 999*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 1000*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m4}, m0, m1, m2, m3 1001*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m0, m1, m2, m3 1002*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 1003*c0909341SAndroid Build Coastguard Worker jmp tx2q 1004*c0909341SAndroid Build Coastguard Worker.pass2: 1005*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pixel_10bpc_max] 1006*c0909341SAndroid Build Coastguard Worker call .pass2_end 1007*c0909341SAndroid Build Coastguard Worker RET 1008*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1009*c0909341SAndroid Build Coastguard Worker.pass2_end: 1010*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_4096] 1011*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 1012*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 1013*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 1014*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1015*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m4 1016*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 1017*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 ; 2 3 6 7 1018*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 ; 0 1 4 5 1019*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 1020*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 1021*c0909341SAndroid Build Coastguard Worker movq xm2, [dstq+strideq*0] 1022*c0909341SAndroid Build Coastguard Worker movhps xm2, [dstq+strideq*1] 1023*c0909341SAndroid Build Coastguard Worker vpbroadcastq m4, [r6 +strideq*0] 1024*c0909341SAndroid Build Coastguard Worker vpbroadcastq m5, [r6 +strideq*1] 1025*c0909341SAndroid Build Coastguard Worker movq xm3, [dstq+strideq*2] 1026*c0909341SAndroid Build Coastguard Worker movhps xm3, [dstq+r3 ] 1027*c0909341SAndroid Build Coastguard Worker vpblendd m2, m4, 0x30 1028*c0909341SAndroid Build Coastguard Worker vpblendd m2, m5, 0xc0 1029*c0909341SAndroid Build Coastguard Worker vpbroadcastq m4, [r6 +strideq*2] 1030*c0909341SAndroid Build Coastguard Worker vpbroadcastq m5, [r6 +r3 ] 1031*c0909341SAndroid Build Coastguard Worker vpblendd m3, m4, 0x30 1032*c0909341SAndroid Build Coastguard Worker vpblendd m3, m5, 0xc0 1033*c0909341SAndroid Build Coastguard Worker pxor m4, m4 1034*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 1035*c0909341SAndroid Build Coastguard Worker paddw m0, m2 ; out0 out1 out4 out5 1036*c0909341SAndroid Build Coastguard Worker paddw m1, m3 ; out2 out3 out6 out7 1037*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m4 1038*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m4 1039*c0909341SAndroid Build Coastguard Worker pminsw m0, m6 1040*c0909341SAndroid Build Coastguard Worker pminsw m1, m6 1041*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 ; out4 out5 1042*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 ; out6 out7 1043*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 1044*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 1045*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*2], xm1 1046*c0909341SAndroid Build Coastguard Worker movhps [dstq+r3 ], xm1 1047*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm2 1048*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm2 1049*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*2], xm3 1050*c0909341SAndroid Build Coastguard Worker movhps [r6 +r3 ], xm3 1051*c0909341SAndroid Build Coastguard Worker ret 1052*c0909341SAndroid Build Coastguard Worker 1053*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, dct, 12 1054*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, identity, 12 1055*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, adst, 12 1056*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN dct, flipadst, 12 1057*c0909341SAndroid Build Coastguard Worker 1058*c0909341SAndroid Build Coastguard Workercglobal idct_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 1059*c0909341SAndroid Build Coastguard Worker jmp m(idct_4x8_internal_10bpc).pass1 1060*c0909341SAndroid Build Coastguard Worker.pass2: 1061*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 1062*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 1063*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 1064*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 1065*c0909341SAndroid Build Coastguard Worker ; transpose & interleave 1066*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q1320 1067*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1320 1068*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1320 1069*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1320 1070*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0, m1 1071*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m1 1072*c0909341SAndroid Build Coastguard Worker punpckldq m5, m2, m3 1073*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m3 1074*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3102 1075*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3102 1076*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x31 ; 1 5 (interleaved) 1077*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m2, 0x20 ; 7 3 (interleaved) 1078*c0909341SAndroid Build Coastguard Worker vperm2i128 m0, m4, m5, 0x20 ; 0 2 (interleaved) 1079*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m5, 0x31 ; 4 6 (interleaved) 1080*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 1081*c0909341SAndroid Build Coastguard Worker call m(idct_8x4_internal_10bpc).main 1082*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m4 ; out7 out6 1083*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; out0 out1 1084*c0909341SAndroid Build Coastguard Worker paddd m1, m2, m5 ; out3 out2 1085*c0909341SAndroid Build Coastguard Worker psubd m2, m5 ; out4 out5 1086*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 1087*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 1088*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_12bpc).end 1089*c0909341SAndroid Build Coastguard Worker 1090*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, dct, 12 1091*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, adst, 12 1092*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, flipadst, 12 1093*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN adst, identity, 12 1094*c0909341SAndroid Build Coastguard Worker 1095*c0909341SAndroid Build Coastguard Workercglobal iadst_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 1096*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_10bpc).main 1097*c0909341SAndroid Build Coastguard Worker psrad m0, m4, 1 1098*c0909341SAndroid Build Coastguard Worker psrad m1, m6, 1 1099*c0909341SAndroid Build Coastguard Worker psrad m2, 1 1100*c0909341SAndroid Build Coastguard Worker psrad m3, 1 1101*c0909341SAndroid Build Coastguard Worker.pass1_end: 1102*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_1024] 1103*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m5}, m0, m1, m2, m3 1104*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 11}, m0, m1, m2, m3 1105*c0909341SAndroid Build Coastguard Worker jmp tx2q 1106*c0909341SAndroid Build Coastguard Worker.pass2: 1107*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 1108*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 1109*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 1110*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 1111*c0909341SAndroid Build Coastguard Worker call .pass2_main 1112*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, m4, 0x33 ; out6 out7 1113*c0909341SAndroid Build Coastguard Worker vpblendd m0, m4, 0xcc ; out0 out1 1114*c0909341SAndroid Build Coastguard Worker pshufd m1, m5, q1032 1115*c0909341SAndroid Build Coastguard Worker psignd m2, m6 ; out4 out5 1116*c0909341SAndroid Build Coastguard Worker psignd m1, m6 ; out2 out3 1117*c0909341SAndroid Build Coastguard Worker.end: 1118*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_16384] 1119*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 3}, m0, m1, m2, m3 1120*c0909341SAndroid Build Coastguard Worker packssdw m0, m2 ; 0 1 4 5 (interleaved) 1121*c0909341SAndroid Build Coastguard Worker packssdw m1, m3 ; 2 3 6 7 (interleaved) 1122*c0909341SAndroid Build Coastguard Worker mova m2, [iadst8_12_shuf] 1123*c0909341SAndroid Build Coastguard Worker vpermd m0, m2, m0 ; 0 1 4 5 1124*c0909341SAndroid Build Coastguard Worker vpermd m1, m2, m1 ; 2 3 6 7 1125*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 1126*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 1127*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 1128*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*4] 1129*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*0] 1130*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*1] 1131*c0909341SAndroid Build Coastguard Worker movq xm5, [dstq+strideq*2] 1132*c0909341SAndroid Build Coastguard Worker movhps xm5, [dstq+r3 ] 1133*c0909341SAndroid Build Coastguard Worker movq xm6, [r6 +strideq*0] 1134*c0909341SAndroid Build Coastguard Worker movhps xm6, [r6 +strideq*1] 1135*c0909341SAndroid Build Coastguard Worker vinserti128 m4, xm6, 1 1136*c0909341SAndroid Build Coastguard Worker movq xm7, [r6 +strideq*2] 1137*c0909341SAndroid Build Coastguard Worker movhps xm7, [r6 +r3 ] 1138*c0909341SAndroid Build Coastguard Worker vinserti128 m5, xm7, 1 1139*c0909341SAndroid Build Coastguard Worker paddw m0, m4 ; 0 1 4 5 1140*c0909341SAndroid Build Coastguard Worker paddw m1, m5 ; 2 3 6 7 1141*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 1142*c0909341SAndroid Build Coastguard Worker pxor m4, m4 1143*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 1144*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m4}, m0, m1 1145*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m5}, m0, m1 1146*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 ; out4 out5 1147*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 ; out6 out7 1148*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm0 1149*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm0 1150*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*2], xm1 1151*c0909341SAndroid Build Coastguard Worker movhps [dstq+r3 ], xm1 1152*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*0], xm2 1153*c0909341SAndroid Build Coastguard Worker movhps [r6 +strideq*1], xm2 1154*c0909341SAndroid Build Coastguard Worker movq [r6 +strideq*2], xm3 1155*c0909341SAndroid Build Coastguard Worker movhps [r6 +r3 ], xm3 1156*c0909341SAndroid Build Coastguard Worker RET 1157*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1158*c0909341SAndroid Build Coastguard Worker.pass2_main: 1159*c0909341SAndroid Build Coastguard Worker ; transpose & interleave 1160*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q1320 1161*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1320 1162*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1320 1163*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1320 1164*c0909341SAndroid Build Coastguard Worker punpckldq m4, m0, m1 1165*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m1 1166*c0909341SAndroid Build Coastguard Worker punpckldq m5, m2, m3 1167*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m3 1168*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x31 ; 7 5 (interleaved) 1169*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m2, 0x20 ; 3 1 (interleaved) 1170*c0909341SAndroid Build Coastguard Worker vperm2i128 m0, m4, m5, 0x20 ; 0 2 (interleaved) 1171*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m5, 0x31 ; 4 6 (interleaved) 1172*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 1173*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_10bpc).main3 1174*c0909341SAndroid Build Coastguard Worker 1175*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, dct, 12 1176*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, adst, 12 1177*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, flipadst, 12 1178*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN flipadst, identity, 12 1179*c0909341SAndroid Build Coastguard Worker 1180*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 1181*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_10bpc).main 1182*c0909341SAndroid Build Coastguard Worker psrad m0, m3, 1 1183*c0909341SAndroid Build Coastguard Worker psrad m1, m2, 1 1184*c0909341SAndroid Build Coastguard Worker psrad m2, m6, 1 1185*c0909341SAndroid Build Coastguard Worker psrad m3, m4, 1 1186*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_12bpc).pass1_end 1187*c0909341SAndroid Build Coastguard Worker.pass2: 1188*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 1189*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 1190*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 1191*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 1192*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_12bpc).pass2_main 1193*c0909341SAndroid Build Coastguard Worker shufpd m3, m4, m0, 0x05 ; out1 out0 1194*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x05 ; out7 out6 1195*c0909341SAndroid Build Coastguard Worker psignd m2, m6 1196*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q1032 1197*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1032 ; out5 out4 1198*c0909341SAndroid Build Coastguard Worker psignd m2, m5, m6 ; out3 out2 1199*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x8_internal_12bpc).end 1200*c0909341SAndroid Build Coastguard Worker 1201*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, dct, 12 1202*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, adst, 12 1203*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, flipadst, 12 1204*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X8_FN identity, identity, 12 1205*c0909341SAndroid Build Coastguard Worker 1206*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 1207*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_4x8_internal_10bpc).pass1 1208*c0909341SAndroid Build Coastguard Worker.pass2: 1209*c0909341SAndroid Build Coastguard Worker ; m0 = in0 in1 1210*c0909341SAndroid Build Coastguard Worker ; m1 = in2 in3 1211*c0909341SAndroid Build Coastguard Worker ; m2 = in4 in5 1212*c0909341SAndroid Build Coastguard Worker ; m3 = in6 in7 1213*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pixel_12bpc_max] 1214*c0909341SAndroid Build Coastguard Worker call m(iidentity_4x8_internal_10bpc).pass2_end 1215*c0909341SAndroid Build Coastguard Worker RET 1216*c0909341SAndroid Build Coastguard Worker 1217*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_4X16_FN 2-3 10 ; type1, type2, bitdepth 1218*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 4x16, %3 1219*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1220*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 1221*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm2, [dconly_%3bpc] 1222*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 1223*c0909341SAndroid Build Coastguard Worker or r3d, 16 1224*c0909341SAndroid Build Coastguard Worker add r6d, 384 1225*c0909341SAndroid Build Coastguard Worker sar r6d, 9 1226*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_4x4_10bpc).dconly3 1227*c0909341SAndroid Build Coastguard Worker%endif 1228*c0909341SAndroid Build Coastguard Worker%endmacro 1229*c0909341SAndroid Build Coastguard Worker 1230*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct 1231*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity 1232*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst 1233*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst 1234*c0909341SAndroid Build Coastguard Worker 1235*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2 1236*c0909341SAndroid Build Coastguard Worker.pass1: 1237*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_3072] 1238*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*2] 1239*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*6] 1240*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*3] 1241*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*7] 1242*c0909341SAndroid Build Coastguard Worker call .pass1_main 1243*c0909341SAndroid Build Coastguard Worker pmulld m0, m6, [cq+32*0] 1244*c0909341SAndroid Build Coastguard Worker pmulld m2, m6, [cq+32*4] 1245*c0909341SAndroid Build Coastguard Worker pmulld m4, m6, [cq+32*1] 1246*c0909341SAndroid Build Coastguard Worker pmulld m6, [cq+32*5] 1247*c0909341SAndroid Build Coastguard Worker call .pass1_main2 1248*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7 1249*c0909341SAndroid Build Coastguard Worker jmp tx2q 1250*c0909341SAndroid Build Coastguard Worker.pass2: 1251*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 1252*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 1253*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 1254*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 1255*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 1256*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m2, m3 1257*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 1258*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m1 1259*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1260*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m4 ; 2 3 1261*c0909341SAndroid Build Coastguard Worker punpckldq m0, m4 ; 0 1 1262*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5, m2 ; 8 9 1263*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m2 ; a b 1264*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m0, 1 ; 4 5 1265*c0909341SAndroid Build Coastguard Worker vextracti128 xm3, m1, 1 ; 6 7 1266*c0909341SAndroid Build Coastguard Worker vextracti128 xm6, m4, 1 ; c d 1267*c0909341SAndroid Build Coastguard Worker vextracti128 xm7, m5, 1 ; e f 1268*c0909341SAndroid Build Coastguard Worker call m(idct_4x16_internal_8bpc).main 1269*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_2048] 1270*c0909341SAndroid Build Coastguard Worker vinserti128 m0, m0, xm1, 1 ; 0 1 3 2 1271*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m2, xm3, 1 ; 4 5 7 6 1272*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m4, xm5, 1 ; 8 9 b a 1273*c0909341SAndroid Build Coastguard Worker vinserti128 m3, m6, xm7, 1 ; c d f e 1274*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pixel_10bpc_max] 1275*c0909341SAndroid Build Coastguard Worker call .pass2_end 1276*c0909341SAndroid Build Coastguard Worker RET 1277*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1278*c0909341SAndroid Build Coastguard Worker.pass1_main: 1279*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_3784] 1280*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_1567] 1281*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_2048] 1282*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_1448] 1283*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 3, 0, 2, _, 9, 8, 4 ; t2l, t3l 1284*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 7, 4, 2, _, 9, 8, 4 ; t2h, t3h 1285*c0909341SAndroid Build Coastguard Worker ret 1286*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1287*c0909341SAndroid Build Coastguard Worker.pass1_main2: 1288*c0909341SAndroid Build Coastguard Worker paddd m0, m10 1289*c0909341SAndroid Build Coastguard Worker paddd m4, m10 1290*c0909341SAndroid Build Coastguard Worker paddd m8, m0, m2 1291*c0909341SAndroid Build Coastguard Worker psubd m0, m2 1292*c0909341SAndroid Build Coastguard Worker paddd m9, m4, m6 1293*c0909341SAndroid Build Coastguard Worker psubd m4, m6 1294*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 11}, m8, m0, m9, m4 ; t0l, t1l, t0h, t1h 1295*c0909341SAndroid Build Coastguard Worker psubd m2, m0, m1 1296*c0909341SAndroid Build Coastguard Worker paddd m1, m0 1297*c0909341SAndroid Build Coastguard Worker psubd m6, m4, m5 1298*c0909341SAndroid Build Coastguard Worker paddd m5, m4 1299*c0909341SAndroid Build Coastguard Worker paddd m0, m8, m3 1300*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m3 1301*c0909341SAndroid Build Coastguard Worker paddd m4, m9, m7 1302*c0909341SAndroid Build Coastguard Worker psubd m7, m9, m7 1303*c0909341SAndroid Build Coastguard Worker ret 1304*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1305*c0909341SAndroid Build Coastguard Worker.pass2_end: 1306*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1307*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1308*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9 1309*c0909341SAndroid Build Coastguard Worker call .write_4x4 1310*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, m9 1311*c0909341SAndroid Build Coastguard Worker call .write_4x4 1312*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m9 1313*c0909341SAndroid Build Coastguard Worker call .write_4x4 1314*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, m9 1315*c0909341SAndroid Build Coastguard Worker call .write_4x4 1316*c0909341SAndroid Build Coastguard Worker ret 1317*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1318*c0909341SAndroid Build Coastguard Worker.write_4x4: 1319*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*0] 1320*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*1] 1321*c0909341SAndroid Build Coastguard Worker vpbroadcastq m5, [dstq+strideq*2] 1322*c0909341SAndroid Build Coastguard Worker vpbroadcastq m6, [dstq+r6 ] 1323*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m7 1324*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m7 1325*c0909341SAndroid Build Coastguard Worker add cq, 32*2 1326*c0909341SAndroid Build Coastguard Worker vpblendd m4, m5, 0xc0 1327*c0909341SAndroid Build Coastguard Worker vpblendd m4, m6, 0x30 1328*c0909341SAndroid Build Coastguard Worker paddw m4, m0 1329*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 1330*c0909341SAndroid Build Coastguard Worker pminsw m4, m8 1331*c0909341SAndroid Build Coastguard Worker vextracti128 xm5, m4, 1 1332*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm4 1333*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm4 1334*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm5 1335*c0909341SAndroid Build Coastguard Worker movq [dstq+r6 ], xm5 1336*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1337*c0909341SAndroid Build Coastguard Worker ret 1338*c0909341SAndroid Build Coastguard Worker 1339*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct 1340*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst 1341*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst 1342*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity 1343*c0909341SAndroid Build Coastguard Worker 1344*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2 1345*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main 1346*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_6144] 1347*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main_end 1348*c0909341SAndroid Build Coastguard Worker psrad m0, m4, 13 1349*c0909341SAndroid Build Coastguard Worker psrad m1, m5, 13 1350*c0909341SAndroid Build Coastguard Worker psrad m2, 13 1351*c0909341SAndroid Build Coastguard Worker psrad m3, 13 1352*c0909341SAndroid Build Coastguard Worker psrad m4, m8, 13 1353*c0909341SAndroid Build Coastguard Worker psrad m5, m9, 13 1354*c0909341SAndroid Build Coastguard Worker psrad m6, 13 1355*c0909341SAndroid Build Coastguard Worker psrad m7, 13 1356*c0909341SAndroid Build Coastguard Worker jmp tx2q 1357*c0909341SAndroid Build Coastguard Worker.pass2: 1358*c0909341SAndroid Build Coastguard Worker call .pass2_main 1359*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_2048] 1360*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pixel_10bpc_max] 1361*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1362*c0909341SAndroid Build Coastguard Worker vpblendd m4, m3, m0, 0xcc ; -out3 out0 out2 -out1 1363*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1032 ; -out11 out8 out10 -out9 1364*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, 0x33 ; -out15 out12 out14 -out13 1365*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1366*c0909341SAndroid Build Coastguard Worker psubw m9, m7, m5 1367*c0909341SAndroid Build Coastguard Worker vpblendd m9, m5, 0x3c ; -2048 2048 2048 -2048 1368*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, m9 1369*c0909341SAndroid Build Coastguard Worker call .write_4x4 1370*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, m9 1371*c0909341SAndroid Build Coastguard Worker call .write_4x4 1372*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m9 1373*c0909341SAndroid Build Coastguard Worker call .write_4x4 1374*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, m9 1375*c0909341SAndroid Build Coastguard Worker call .write_4x4 1376*c0909341SAndroid Build Coastguard Worker RET 1377*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1378*c0909341SAndroid Build Coastguard Worker.write_4x4: 1379*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+r6 ] 1380*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+strideq*0] 1381*c0909341SAndroid Build Coastguard Worker vpbroadcastq m5, [dstq+strideq*1] 1382*c0909341SAndroid Build Coastguard Worker vpbroadcastq m6, [dstq+strideq*2] 1383*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m7 1384*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m7 1385*c0909341SAndroid Build Coastguard Worker add cq, 32*2 1386*c0909341SAndroid Build Coastguard Worker vpblendd m4, m5, 0xc0 1387*c0909341SAndroid Build Coastguard Worker vpblendd m4, m6, 0x30 1388*c0909341SAndroid Build Coastguard Worker paddw m4, m0 1389*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 1390*c0909341SAndroid Build Coastguard Worker pminsw m4, m8 1391*c0909341SAndroid Build Coastguard Worker vextracti128 xm5, m4, 1 1392*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*0], xm4 1393*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm5 1394*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*2], xm5 1395*c0909341SAndroid Build Coastguard Worker movq [dstq+r6 ], xm4 1396*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1397*c0909341SAndroid Build Coastguard Worker ret 1398*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1399*c0909341SAndroid Build Coastguard Worker.pass2_main: 1400*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 1401*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 1402*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 1403*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 1404*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 1405*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m2, m3 1406*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 1407*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m0, m1 1408*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1409*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m4 1410*c0909341SAndroid Build Coastguard Worker punpckldq m0, m4 1411*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5, m2 1412*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m2 1413*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, m1, 0x33 1414*c0909341SAndroid Build Coastguard Worker vpblendd m0, m1, 0xcc 1415*c0909341SAndroid Build Coastguard Worker shufpd m2, m5, m4, 0x05 1416*c0909341SAndroid Build Coastguard Worker shufpd m4, m5, 0x05 1417*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m3, 0x31 ; 4 7 6 5 1418*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm3, 1 ; 0 3 2 1 1419*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m2, m4, 0x31 ; c f e d ; ???? 1420*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm4, 1 ; b 8 9 a 1421*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_8bpc).main2 1422*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_2896x8] 1423*c0909341SAndroid Build Coastguard Worker paddsw m1, m2, m4 1424*c0909341SAndroid Build Coastguard Worker psubsw m2, m4 1425*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5 ; -out7 out4 out6 -out5 1426*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5 ; out8 -out11 -out9 out10 1427*c0909341SAndroid Build Coastguard Worker ret 1428*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1429*c0909341SAndroid Build Coastguard Worker.main: 1430*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [cq+16* 0] 1431*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16* 2] 1432*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m1, [cq+16*15] 1433*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16*13] 1434*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16* 4] 1435*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [cq+16* 6] 1436*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m3, [cq+16*11] 1437*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m7, [cq+16* 9] 1438*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x0c ; 0 2 1439*c0909341SAndroid Build Coastguard Worker shufpd m1, m5, 0x0c ; 15 13 1440*c0909341SAndroid Build Coastguard Worker shufpd m2, m6, 0x0c ; 4 6 1441*c0909341SAndroid Build Coastguard Worker shufpd m3, m7, 0x0c ; 11 9 1442*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16* 8] 1443*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [cq+16*10] 1444*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16* 7] 1445*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m7, [cq+16* 5] 1446*c0909341SAndroid Build Coastguard Worker shufpd m4, m6, 0x0c ; 8 10 1447*c0909341SAndroid Build Coastguard Worker shufpd m5, m7, 0x0c ; 7 5 1448*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [cq+16*12] 1449*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m7, [cq+16*14] 1450*c0909341SAndroid Build Coastguard Worker shufpd m6, m7, 0x0c ; 12 14 1451*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m7, [cq+16* 3] 1452*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m8, [cq+16* 1] 1453*c0909341SAndroid Build Coastguard Worker shufpd m7, m8, 0x0c ; 3 1 1454*c0909341SAndroid Build Coastguard Worker.main2: 1455*c0909341SAndroid Build Coastguard Worker ; expects: m12 = clip_min m13 = clip_max 1456*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 1457*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 8, 9, 10, 11, 201_995, 4091_3973, 1 1458*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 8, 9, 10, 11, 1751_2440, 3703_3290, 1 1459*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 4, 8, 9, 10, 11, 3035_3513, 2751_2106, 1 1460*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, 10, 11, 3857_4052, 1380_601, 1 1461*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t8a t10a 1462*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t0a t2a 1463*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m5 ; t9a t11a 1464*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t1a t3a 1465*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m6 ; t12a t14a 1466*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t4a t6a 1467*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m7 ; t13a t15a 1468*c0909341SAndroid Build Coastguard Worker paddd m3, m7 ; t5a t7a 1469*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m8 1470*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m8 1471*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 4, 7, 9, 10, 11, 799_3406, 4017_2276, 1 1472*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 5, 7, 9, 10, 11, 4017_2276, 10, 1 1473*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m2 ; t4 t6 1474*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t0 t2 1475*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m3 ; t5 t7 1476*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t1 t3 1477*c0909341SAndroid Build Coastguard Worker psubd m3, m4, m6 ; t12a t14a 1478*c0909341SAndroid Build Coastguard Worker paddd m4, m6 ; t8a t10a 1479*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m5 ; t13a t15a 1480*c0909341SAndroid Build Coastguard Worker paddd m8, m5 ; t9a t11a 1481*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m6, m7, m8 1482*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m6, m7, m8 1483*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m3, m7 ; t12a t4 1484*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m7 ; t14a t6 1485*c0909341SAndroid Build Coastguard Worker punpckhqdq m7, m6, m2 ; t15a t7 1486*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m2 ; t13a t5 1487*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 3, 2, 9, 10, 11, 3784, 1567 1488*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 6, 2, 9, 10, 11, 1567, 10 1489*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_2896] 1490*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m9, [pw_2048_m2048] ; + + - - 1491*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m4, m0 ; t10a t2 1492*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m0 ; t8a t0 1493*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m8, m1 ; t11a t3 1494*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m1 ; t9a t1 1495*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; out2 -out3 1496*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; t14a t6 1497*c0909341SAndroid Build Coastguard Worker paddd m7, m5, m3 ; -out13 out12 1498*c0909341SAndroid Build Coastguard Worker psubd m5, m3 ; t15a t7 1499*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m0 ; t11 t3a 1500*c0909341SAndroid Build Coastguard Worker paddd m8, m0 ; out14 -out15 1501*c0909341SAndroid Build Coastguard Worker paddd m0, m4, m2 ; -out1 out0 1502*c0909341SAndroid Build Coastguard Worker psubd m4, m2 ; t10 t2a 1503*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m6, m5, m3, m4 1504*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m6, m5, m3, m4 1505*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m10}, m6, m5, m3, m4 1506*c0909341SAndroid Build Coastguard Worker paddd m6, m11 1507*c0909341SAndroid Build Coastguard Worker paddd m4, m11 1508*c0909341SAndroid Build Coastguard Worker paddd m2, m6, m5 ; -out5 out4 1509*c0909341SAndroid Build Coastguard Worker psubd m6, m5 ; out10 -out11 1510*c0909341SAndroid Build Coastguard Worker psubd m5, m4, m3 ; -out9 out8 1511*c0909341SAndroid Build Coastguard Worker paddd m3, m4 ; out6 -out7 1512*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m2, m3, m5, m6 1513*c0909341SAndroid Build Coastguard Worker REPX {psignd x, m9}, m1, m8, m3, m6 1514*c0909341SAndroid Build Coastguard Worker pshufd m9, m9, q1032 1515*c0909341SAndroid Build Coastguard Worker REPX {psignd x, m9}, m0, m7, m2, m5 1516*c0909341SAndroid Build Coastguard Worker ret 1517*c0909341SAndroid Build Coastguard Worker 1518*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct 1519*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst 1520*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst 1521*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity 1522*c0909341SAndroid Build Coastguard Worker 1523*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2 1524*c0909341SAndroid Build Coastguard Worker.pass1: 1525*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main 1526*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_6144] 1527*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main_end 1528*c0909341SAndroid Build Coastguard Worker psrad m0, m3, 13 1529*c0909341SAndroid Build Coastguard Worker psrad m1, m2, 13 1530*c0909341SAndroid Build Coastguard Worker psrad m2, m5, 13 1531*c0909341SAndroid Build Coastguard Worker psrad m3, m4, 13 1532*c0909341SAndroid Build Coastguard Worker psrad m4, m7, 13 1533*c0909341SAndroid Build Coastguard Worker psrad m5, m6, 13 1534*c0909341SAndroid Build Coastguard Worker psrad m6, m9, 13 1535*c0909341SAndroid Build Coastguard Worker psrad m7, m8, 13 1536*c0909341SAndroid Build Coastguard Worker jmp tx2q 1537*c0909341SAndroid Build Coastguard Worker.pass2: 1538*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).pass2_main 1539*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_2048] 1540*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pixel_10bpc_max] 1541*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1542*c0909341SAndroid Build Coastguard Worker vpblendd m4, m3, m0, 0x33 ; -out0 out3 out1 -out2 1543*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q1032 ; -out11 out8 out10 -out9 1544*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, 0xcc ; -out12 out15 out13 -out14 1545*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1546*c0909341SAndroid Build Coastguard Worker psubw m9, m7, m5 1547*c0909341SAndroid Build Coastguard Worker vpblendd m9, m5, 0x3c ; -2048 2048 2048 -2048 1548*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, m9 1549*c0909341SAndroid Build Coastguard Worker call .write_4x4 1550*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m9 1551*c0909341SAndroid Build Coastguard Worker call .write_4x4 1552*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1, m9 1553*c0909341SAndroid Build Coastguard Worker call .write_4x4 1554*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m3, m9 1555*c0909341SAndroid Build Coastguard Worker call .write_4x4 1556*c0909341SAndroid Build Coastguard Worker RET 1557*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1558*c0909341SAndroid Build Coastguard Worker.write_4x4: 1559*c0909341SAndroid Build Coastguard Worker movq xm4, [dstq+strideq*0] 1560*c0909341SAndroid Build Coastguard Worker movhps xm4, [dstq+r6 ] 1561*c0909341SAndroid Build Coastguard Worker vpbroadcastq m5, [dstq+strideq*1] 1562*c0909341SAndroid Build Coastguard Worker vpbroadcastq m6, [dstq+strideq*2] 1563*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m7 1564*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m7 1565*c0909341SAndroid Build Coastguard Worker add cq, 32*2 1566*c0909341SAndroid Build Coastguard Worker vpblendd m4, m5, 0x30 1567*c0909341SAndroid Build Coastguard Worker vpblendd m4, m6, 0xc0 1568*c0909341SAndroid Build Coastguard Worker paddw m4, m0 1569*c0909341SAndroid Build Coastguard Worker pmaxsw m4, m7 1570*c0909341SAndroid Build Coastguard Worker pminsw m4, m8 1571*c0909341SAndroid Build Coastguard Worker vextracti128 xm5, m4, 1 1572*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm4 1573*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*1], xm5 1574*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*2], xm5 1575*c0909341SAndroid Build Coastguard Worker movhps [dstq+r6 ], xm4 1576*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1577*c0909341SAndroid Build Coastguard Worker ret 1578*c0909341SAndroid Build Coastguard Worker 1579*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct 1580*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst 1581*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst 1582*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity 1583*c0909341SAndroid Build Coastguard Worker 1584*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_10bpc, 0, 7, 11, dst, stride, c, eob, tx2 1585*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_5793] 1586*c0909341SAndroid Build Coastguard Worker pmulld m0, m7, [cq+32*0] 1587*c0909341SAndroid Build Coastguard Worker pmulld m4, m7, [cq+32*1] 1588*c0909341SAndroid Build Coastguard Worker pmulld m1, m7, [cq+32*2] 1589*c0909341SAndroid Build Coastguard Worker pmulld m5, m7, [cq+32*3] 1590*c0909341SAndroid Build Coastguard Worker pmulld m2, m7, [cq+32*4] 1591*c0909341SAndroid Build Coastguard Worker pmulld m6, m7, [cq+32*5] 1592*c0909341SAndroid Build Coastguard Worker pmulld m3, m7, [cq+32*6] 1593*c0909341SAndroid Build Coastguard Worker pmulld m7, [cq+32*7] 1594*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_6144] 1595*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m8}, m0, m4, m1, m5, m2, m6, m3, m7 1596*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m0, m4, m1, m5, m2, m6, m3, m7 1597*c0909341SAndroid Build Coastguard Worker jmp tx2q 1598*c0909341SAndroid Build Coastguard Worker.pass2: 1599*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 1600*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 1601*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 1602*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 1603*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pw_1697x16] 1604*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_2048] 1605*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, m0 1606*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m1 1607*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m2 1608*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m3 1609*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x}, m0, m1, m2, m3 1610*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 1611*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 1612*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 1613*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 1614*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pixel_10bpc_max] 1615*c0909341SAndroid Build Coastguard Worker call .pass2_end 1616*c0909341SAndroid Build Coastguard Worker RET 1617*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1618*c0909341SAndroid Build Coastguard Worker.pass2_end: 1619*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m0, m1 1620*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 1621*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 1622*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 1623*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*5] 1624*c0909341SAndroid Build Coastguard Worker pxor m3, m3 1625*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m0, m2 ; 2 3 6 7 1626*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 ; 0 1 4 5 1627*c0909341SAndroid Build Coastguard Worker punpckldq m6, m7, m1 ; 8 9 c d 1628*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m1 ; a b e f 1629*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8 1630*c0909341SAndroid Build Coastguard Worker call .write_2x4x2 1631*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m5, m8 1632*c0909341SAndroid Build Coastguard Worker call .write_2x4x2 1633*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6, m8 1634*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 1635*c0909341SAndroid Build Coastguard Worker call .write_2x4x2 1636*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m8 1637*c0909341SAndroid Build Coastguard Worker call .write_2x4x2 1638*c0909341SAndroid Build Coastguard Worker ret 1639*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1640*c0909341SAndroid Build Coastguard Worker.write_2x4x2: 1641*c0909341SAndroid Build Coastguard Worker movq xm1, [dstq+strideq*0] 1642*c0909341SAndroid Build Coastguard Worker movhps xm1, [dstq+strideq*1] 1643*c0909341SAndroid Build Coastguard Worker vpbroadcastq m2, [dstq+strideq*4] 1644*c0909341SAndroid Build Coastguard Worker vpblendd m1, m2, 0x30 1645*c0909341SAndroid Build Coastguard Worker vpbroadcastq m2, [dstq+r6 ] 1646*c0909341SAndroid Build Coastguard Worker vpblendd m1, m2, 0xc0 1647*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m3 1648*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m3 1649*c0909341SAndroid Build Coastguard Worker add cq, 32*2 1650*c0909341SAndroid Build Coastguard Worker paddw m1, m0 1651*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m3 1652*c0909341SAndroid Build Coastguard Worker pminsw m1, m4 1653*c0909341SAndroid Build Coastguard Worker vextracti128 xm2, m1, 1 1654*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*0], xm1 1655*c0909341SAndroid Build Coastguard Worker movhps [dstq+strideq*1], xm1 1656*c0909341SAndroid Build Coastguard Worker movq [dstq+strideq*4], xm2 1657*c0909341SAndroid Build Coastguard Worker movhps [dstq+r6 ], xm2 1658*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 1659*c0909341SAndroid Build Coastguard Worker ret 1660*c0909341SAndroid Build Coastguard Worker 1661*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, dct, 12 1662*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, identity, 12 1663*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, adst, 12 1664*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN dct, flipadst, 12 1665*c0909341SAndroid Build Coastguard Worker 1666*c0909341SAndroid Build Coastguard Workercglobal idct_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 1667*c0909341SAndroid Build Coastguard Worker jmp m(idct_4x16_internal_10bpc).pass1 1668*c0909341SAndroid Build Coastguard Worker.pass2: 1669*c0909341SAndroid Build Coastguard Worker punpckldq m8, m0, m1 1670*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m1 1671*c0909341SAndroid Build Coastguard Worker punpckldq m9, m2, m3 1672*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m3 1673*c0909341SAndroid Build Coastguard Worker punpckldq m1, m4, m5 1674*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m5 1675*c0909341SAndroid Build Coastguard Worker punpckldq m3, m6, m7 1676*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m7 1677*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m0, m2 ; 2 6 1678*c0909341SAndroid Build Coastguard Worker punpckhqdq m12, m0, m2 ; 3 7 1679*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m8, m9 ; 0 4 1680*c0909341SAndroid Build Coastguard Worker punpckhqdq m10, m8, m9 ; 1 5 1681*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1, m3 ; 8 12 1682*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m1, m3 ; 9 13 1683*c0909341SAndroid Build Coastguard Worker punpcklqdq m9, m4, m6 ; 10 14 1684*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m6 ; 11 15 1685*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m5, m9, 0x20 ; 2 10 1686*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m9, m5, 0x31 ; 14 6 1687*c0909341SAndroid Build Coastguard Worker vpermq m11, m4, q1302 ; 15 11 1688*c0909341SAndroid Build Coastguard Worker ; interleave 1689*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3, m10 1690*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 1691*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 1692*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3, m10, m11, m12, m13 1693*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3, m10, m11, m12, m13 1694*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).pass1_main 1695*c0909341SAndroid Build Coastguard Worker vpermq m6, m12, q1302 ; 7 3 1696*c0909341SAndroid Build Coastguard Worker vpermq m5, m13, q3120 ; 9 13 1697*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).pass1_main2 1698*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).pass1_main3 1699*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 3}, m0, m1, m2, m3, m4, m5, m6, m7 1700*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 1701*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 1702*c0909341SAndroid Build Coastguard Worker packssdw m2, m4, m5 1703*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 1704*c0909341SAndroid Build Coastguard Worker mova m4, [idct16_12_shuf] 1705*c0909341SAndroid Build Coastguard Worker REPX {vpermd x, m4, x}, m0, m1, m2, m3 1706*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_16384] 1707*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pixel_12bpc_max] 1708*c0909341SAndroid Build Coastguard Worker call m(idct_4x16_internal_10bpc).pass2_end 1709*c0909341SAndroid Build Coastguard Worker RET 1710*c0909341SAndroid Build Coastguard Worker 1711*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, dct, 12 1712*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, adst, 12 1713*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, flipadst, 12 1714*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN adst, identity, 12 1715*c0909341SAndroid Build Coastguard Worker 1716*c0909341SAndroid Build Coastguard Workercglobal iadst_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 1717*c0909341SAndroid Build Coastguard Worker call .main_pass1 1718*c0909341SAndroid Build Coastguard Worker psrad m0, m4, 12 1719*c0909341SAndroid Build Coastguard Worker psrad m1, m5, 12 1720*c0909341SAndroid Build Coastguard Worker psrad m2, 12 1721*c0909341SAndroid Build Coastguard Worker psrad m3, 12 1722*c0909341SAndroid Build Coastguard Worker psrad m4, m8, 12 1723*c0909341SAndroid Build Coastguard Worker psrad m5, m9, 12 1724*c0909341SAndroid Build Coastguard Worker psrad m6, 12 1725*c0909341SAndroid Build Coastguard Worker psrad m7, 12 1726*c0909341SAndroid Build Coastguard Worker jmp tx2q 1727*c0909341SAndroid Build Coastguard Worker.pass2: 1728*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 1729*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 1730*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 1731*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 1732*c0909341SAndroid Build Coastguard Worker call .transpose_16x4 1733*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).main2 1734*c0909341SAndroid Build Coastguard Worker pshufd m4, m5, q1032 1735*c0909341SAndroid Build Coastguard Worker psrad m5, m6, 3 1736*c0909341SAndroid Build Coastguard Worker pshufd m6, m7, q1032 1737*c0909341SAndroid Build Coastguard Worker psrad m7, m8, 3 1738*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q1032}, m0, m2 1739*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 3}, m0, m1, m2, m3, m4, m6 1740*c0909341SAndroid Build Coastguard Worker.pass2_end: 1741*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 1742*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 1743*c0909341SAndroid Build Coastguard Worker packssdw m2, m4, m5 1744*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 1745*c0909341SAndroid Build Coastguard Worker mova m4, [iadst16_12_shuf] 1746*c0909341SAndroid Build Coastguard Worker REPX {vpermd x, m4, x}, m0, m1, m2, m3 1747*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_16384] 1748*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pixel_12bpc_max] 1749*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 1750*c0909341SAndroid Build Coastguard Worker pxor m7, m7 1751*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9 1752*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).write_4x4 1753*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, m1 1754*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).write_4x4 1755*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, m2 1756*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).write_4x4 1757*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m9, m3 1758*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).write_4x4 1759*c0909341SAndroid Build Coastguard Worker RET 1760*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1761*c0909341SAndroid Build Coastguard Worker.transpose_16x4: 1762*c0909341SAndroid Build Coastguard Worker ; transpose & interleave 1763*c0909341SAndroid Build Coastguard Worker punpckldq m8, m0, m1 1764*c0909341SAndroid Build Coastguard Worker punpckhdq m0, m1 1765*c0909341SAndroid Build Coastguard Worker punpckldq m9, m2, m3 1766*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m3 1767*c0909341SAndroid Build Coastguard Worker punpckldq m1, m4, m5 1768*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m5 1769*c0909341SAndroid Build Coastguard Worker punpckldq m3, m6, m7 1770*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m7 1771*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m8, m0 1772*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m8 1773*c0909341SAndroid Build Coastguard Worker punpcklqdq m11, m9, m2 1774*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m9 1775*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m1, m4 1776*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m1 1777*c0909341SAndroid Build Coastguard Worker punpcklqdq m9, m3, m6 1778*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m3 1779*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m0, m2, 0x31 ; 7 5 1780*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m0, m2, 0x20 ; 3 1 1781*c0909341SAndroid Build Coastguard Worker vperm2i128 m0, m10, m11, 0x20 ; 0 2 1782*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m10, m11, 0x31 ; 4 6 1783*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m4, m6, 0x31 ; 15 13 1784*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m4, m6, 0x20 ; 11 9 1785*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m8, m9, 0x20 ; 8 10 1786*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m8, m9, 0x31 ; 12 14 1787*c0909341SAndroid Build Coastguard Worker ret 1788*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1789*c0909341SAndroid Build Coastguard Worker.main_pass1: 1790*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main 1791*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_3072] 1792*c0909341SAndroid Build Coastguard Worker paddd m10, m4, m5 1793*c0909341SAndroid Build Coastguard Worker psubd m4, m3 1794*c0909341SAndroid Build Coastguard Worker psubd m5, m3 1795*c0909341SAndroid Build Coastguard Worker paddd m3, m10 1796*c0909341SAndroid Build Coastguard Worker psubd m8, m7, m1 1797*c0909341SAndroid Build Coastguard Worker paddd m7, m9 1798*c0909341SAndroid Build Coastguard Worker psubd m9, m1 1799*c0909341SAndroid Build Coastguard Worker paddd m7, m1 1800*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m4, m5, m2, m3, m8, m9, m0, m7 1801*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m6}, m4, m5, m2, m3, m8, m9, m7 1802*c0909341SAndroid Build Coastguard Worker paddd m6, m0 1803*c0909341SAndroid Build Coastguard Worker ret 1804*c0909341SAndroid Build Coastguard Worker 1805*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, dct, 12 1806*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, adst, 12 1807*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, flipadst, 12 1808*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN flipadst, identity, 12 1809*c0909341SAndroid Build Coastguard Worker 1810*c0909341SAndroid Build Coastguard Workercglobal iflipadst_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 1811*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_12bpc).main_pass1 1812*c0909341SAndroid Build Coastguard Worker psrad m0, m3, 12 1813*c0909341SAndroid Build Coastguard Worker psrad m1, m2, 12 1814*c0909341SAndroid Build Coastguard Worker psrad m2, m5, 12 1815*c0909341SAndroid Build Coastguard Worker psrad m3, m4, 12 1816*c0909341SAndroid Build Coastguard Worker psrad m4, m7, 12 1817*c0909341SAndroid Build Coastguard Worker psrad m5, m6, 12 1818*c0909341SAndroid Build Coastguard Worker psrad m6, m9, 12 1819*c0909341SAndroid Build Coastguard Worker psrad m7, m8, 12 1820*c0909341SAndroid Build Coastguard Worker jmp tx2q 1821*c0909341SAndroid Build Coastguard Worker.pass2: 1822*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 1823*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 1824*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 1825*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 1826*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_12bpc).transpose_16x4 1827*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).main2 1828*c0909341SAndroid Build Coastguard Worker pshufd m4, m3, q1032 1829*c0909341SAndroid Build Coastguard Worker psrad m3, m5, 3 1830*c0909341SAndroid Build Coastguard Worker psrad m5, m2, 3 1831*c0909341SAndroid Build Coastguard Worker pshufd m2, m6, q1032 1832*c0909341SAndroid Build Coastguard Worker pshufd m6, m1, q1032 1833*c0909341SAndroid Build Coastguard Worker psrad m1, m7, 3 1834*c0909341SAndroid Build Coastguard Worker psrad m7, m0, 3 1835*c0909341SAndroid Build Coastguard Worker pshufd m0, m8, q1032 1836*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 3}, m0, m2, m4, m6 1837*c0909341SAndroid Build Coastguard Worker jmp m(iadst_4x16_internal_12bpc).pass2_end 1838*c0909341SAndroid Build Coastguard Worker 1839*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, dct, 12 1840*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, adst, 12 1841*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, flipadst, 12 1842*c0909341SAndroid Build Coastguard WorkerINV_TXFM_4X16_FN identity, identity, 12 1843*c0909341SAndroid Build Coastguard Worker 1844*c0909341SAndroid Build Coastguard Workercglobal iidentity_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 1845*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_1697] 1846*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 1847*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*1] 1848*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*2] 1849*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*3] 1850*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_6144] 1851*c0909341SAndroid Build Coastguard Worker pmulld m2, m8, m0 1852*c0909341SAndroid Build Coastguard Worker pmulld m6, m8, m4 1853*c0909341SAndroid Build Coastguard Worker pmulld m3, m8, m1 1854*c0909341SAndroid Build Coastguard Worker pmulld m7, m8, m5 1855*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32*4] 1856*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32*5] 1857*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32*6] 1858*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*7] 1859*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m2, m6, m3, m7 1860*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m2, m6, m3, m7 1861*c0909341SAndroid Build Coastguard Worker paddd m0, m2 1862*c0909341SAndroid Build Coastguard Worker pmulld m2, m8, m10 1863*c0909341SAndroid Build Coastguard Worker paddd m4, m6 1864*c0909341SAndroid Build Coastguard Worker pmulld m6, m8, m11 1865*c0909341SAndroid Build Coastguard Worker paddd m1, m3 1866*c0909341SAndroid Build Coastguard Worker pmulld m3, m8, m12 1867*c0909341SAndroid Build Coastguard Worker paddd m5, m7 1868*c0909341SAndroid Build Coastguard Worker pmulld m7, m8, m13 1869*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m4, m1, m5 1870*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m2, m6, m3, m7 1871*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m2, m6, m3, m7 1872*c0909341SAndroid Build Coastguard Worker paddd m2, m10 1873*c0909341SAndroid Build Coastguard Worker paddd m6, m11 1874*c0909341SAndroid Build Coastguard Worker paddd m3, m12 1875*c0909341SAndroid Build Coastguard Worker paddd m7, m13 1876*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m2, m6, m3, m7 1877*c0909341SAndroid Build Coastguard Worker jmp tx2q 1878*c0909341SAndroid Build Coastguard Worker.pass2: 1879*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 1880*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 1881*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 1882*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 1883*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5793] 1884*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_1024] 1885*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 1886*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 1887*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 14}, m0, m1, m2, m3, m4, m5, m6, m7 1888*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 1889*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 1890*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 1891*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 1892*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_16384] 1893*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pixel_12bpc_max] 1894*c0909341SAndroid Build Coastguard Worker call m(iidentity_4x16_internal_10bpc).pass2_end 1895*c0909341SAndroid Build Coastguard Worker RET 1896*c0909341SAndroid Build Coastguard Worker 1897*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X4_FN 2-3 10 ; type1, type2, bitdepth 1898*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 8x4, %3 1899*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 1900*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [dconly_%3bpc] 1901*c0909341SAndroid Build Coastguard Worker%if %3 = 10 1902*c0909341SAndroid Build Coastguard Worker.dconly: 1903*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 1904*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 1905*c0909341SAndroid Build Coastguard Worker or r3d, 4 1906*c0909341SAndroid Build Coastguard Worker add r6d, 128 1907*c0909341SAndroid Build Coastguard Worker sar r6d, 8 1908*c0909341SAndroid Build Coastguard Worker imul r6d, 181 1909*c0909341SAndroid Build Coastguard Worker add r6d, 128 1910*c0909341SAndroid Build Coastguard Worker sar r6d, 8 1911*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3 1912*c0909341SAndroid Build Coastguard Worker%else 1913*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x4_10bpc).dconly 1914*c0909341SAndroid Build Coastguard Worker%endif 1915*c0909341SAndroid Build Coastguard Worker%endif 1916*c0909341SAndroid Build Coastguard Worker%endmacro 1917*c0909341SAndroid Build Coastguard Worker 1918*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct 1919*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity 1920*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst 1921*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst 1922*c0909341SAndroid Build Coastguard Worker 1923*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2 1924*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 1925*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 1926*c0909341SAndroid Build Coastguard Worker.pass1: 1927*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m1, [cq+16*1] 1928*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [cq+16*5] 1929*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16*3] 1930*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m3, [cq+16*7] 1931*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_2896] 1932*c0909341SAndroid Build Coastguard Worker shufpd m1, m0, 0x0c ; 1 5 1933*c0909341SAndroid Build Coastguard Worker shufpd m3, m2, 0x0c ; 7 3 1934*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [cq+16*0] 1935*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16*2] 1936*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16*4] 1937*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16*6] 1938*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 1939*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x0c ; 0 2 1940*c0909341SAndroid Build Coastguard Worker shufpd m2, m5, 0x0c ; 4 6 1941*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m6}, m1, m3, m0, m2 1942*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m1, m3, m0, m2 1943*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m1, m3, m0, m2 1944*c0909341SAndroid Build Coastguard Worker call .main 1945*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m4 ; out7 out6 (interleaved) 1946*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; out0 out1 (interleaved) 1947*c0909341SAndroid Build Coastguard Worker paddd m1, m2, m5 ; out3 out2 (interleaved) 1948*c0909341SAndroid Build Coastguard Worker psubd m2, m5 ; out4 out5 (interleaved) 1949*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q1032 1950*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q1032 1951*c0909341SAndroid Build Coastguard Worker jmp tx2q 1952*c0909341SAndroid Build Coastguard Worker.pass2: 1953*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [deint_shuf] 1954*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 1955*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 1956*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x31 1957*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 1958*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 1959*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 1960*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED_WORD 0, 1, 2, 3, 4, 7 1961*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 ; out0 out1 1962*c0909341SAndroid Build Coastguard Worker vpermq m2, m1, q2031 ; out2 out3 1963*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_10bpc).end 1964*c0909341SAndroid Build Coastguard WorkerALIGN function_align 1965*c0909341SAndroid Build Coastguard Worker.main: 1966*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 3, 4, 5, 6, 7, 799_3406, 4017_2276, 1 1967*c0909341SAndroid Build Coastguard Worker IDCT4_1D_PACKED 0, 2, 4, 5, 6, 7 1968*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_2896] 1969*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m1, m3 ; t4a t7a 1970*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m3 ; t5a t6a 1971*c0909341SAndroid Build Coastguard Worker psubd m3, m4, m1 ; t5a t6a 1972*c0909341SAndroid Build Coastguard Worker paddd m4, m1 ; t4 t7 1973*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m3, m4, m0, m2 1974*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m3, m4, m0, m2 1975*c0909341SAndroid Build Coastguard Worker pmulld m3, m6 1976*c0909341SAndroid Build Coastguard Worker pshufd m1, m3, q1032 1977*c0909341SAndroid Build Coastguard Worker paddd m3, m7 1978*c0909341SAndroid Build Coastguard Worker psubd m5, m3, m1 1979*c0909341SAndroid Build Coastguard Worker paddd m1, m3 1980*c0909341SAndroid Build Coastguard Worker psrad m5, 12 1981*c0909341SAndroid Build Coastguard Worker psrad m1, 12 1982*c0909341SAndroid Build Coastguard Worker vpblendd m5, m4, 0x33 ; t4 t5 1983*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m1 ; t7 t6 1984*c0909341SAndroid Build Coastguard Worker ret 1985*c0909341SAndroid Build Coastguard Worker 1986*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct 1987*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst 1988*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst 1989*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity 1990*c0909341SAndroid Build Coastguard Worker 1991*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2 1992*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_10bpc).main 1993*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, m4, 0x33 ; out6 out7 1994*c0909341SAndroid Build Coastguard Worker vpblendd m0, m4, 0xcc ; out0 out1 1995*c0909341SAndroid Build Coastguard Worker pshufd m1, m5, q1032 1996*c0909341SAndroid Build Coastguard Worker psignd m2, m6 ; out4 out5 1997*c0909341SAndroid Build Coastguard Worker psignd m1, m6 ; out2 out3 1998*c0909341SAndroid Build Coastguard Worker jmp tx2q 1999*c0909341SAndroid Build Coastguard Worker.pass2: 2000*c0909341SAndroid Build Coastguard Worker call .pass2_main 2001*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 ; out0 out1 2002*c0909341SAndroid Build Coastguard Worker vpermq m2, m1, q3120 ; out2 out3 2003*c0909341SAndroid Build Coastguard Worker.end: 2004*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pw_2048] 2005*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m1 2006*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2 2007*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_10bpc_max] 2008*c0909341SAndroid Build Coastguard Worker.end2: 2009*c0909341SAndroid Build Coastguard Worker mova xm2, [dstq+strideq*0] 2010*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [dstq+strideq*1], 1 2011*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 2012*c0909341SAndroid Build Coastguard Worker mova xm3, [r6 +strideq*0] 2013*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r6 +strideq*1], 1 2014*c0909341SAndroid Build Coastguard Worker pxor m4, m4 2015*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 2016*c0909341SAndroid Build Coastguard Worker paddw m0, m2 2017*c0909341SAndroid Build Coastguard Worker paddw m1, m3 2018*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m4 2019*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m4 2020*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 2021*c0909341SAndroid Build Coastguard Worker pminsw m1, m5 2022*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 2023*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m0, 1 2024*c0909341SAndroid Build Coastguard Worker mova [r6 +strideq*0], xm1 2025*c0909341SAndroid Build Coastguard Worker vextracti128 [r6 +strideq*1], m1, 1 2026*c0909341SAndroid Build Coastguard Worker RET 2027*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2028*c0909341SAndroid Build Coastguard Worker.pass2_main: 2029*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [deint_shuf] 2030*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2031*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 2032*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 2033*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x31 2034*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 2035*c0909341SAndroid Build Coastguard Worker pshufb m0, m4 2036*c0909341SAndroid Build Coastguard Worker pshufb m1, m4 2037*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_8bpc).main 2038*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2039*c0909341SAndroid Build Coastguard Worker.main: 2040*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pd_2896] 2041*c0909341SAndroid Build Coastguard Worker pmulld m0, m1, [cq+32*0] 2042*c0909341SAndroid Build Coastguard Worker pmulld m3, m1, [cq+32*3] 2043*c0909341SAndroid Build Coastguard Worker pmulld m2, m1, [cq+32*2] 2044*c0909341SAndroid Build Coastguard Worker pmulld m1, [cq+32*1] 2045*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_2048] 2046*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m4}, m0, m3, m2, m1 2047*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m3, m2, m1 2048*c0909341SAndroid Build Coastguard Worker.main2: 2049*c0909341SAndroid Build Coastguard Worker IADST4_1D 2050*c0909341SAndroid Build Coastguard Worker ret 2051*c0909341SAndroid Build Coastguard Worker 2052*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct 2053*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst 2054*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst 2055*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity 2056*c0909341SAndroid Build Coastguard Worker 2057*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_10bpc, 0, 5, 10, dst, stride, c, eob, tx2 2058*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_10bpc).main 2059*c0909341SAndroid Build Coastguard Worker shufpd m3, m4, m0, 0x05 2060*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x05 2061*c0909341SAndroid Build Coastguard Worker psignd m2, m6 2062*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q1032 2063*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1032 2064*c0909341SAndroid Build Coastguard Worker psignd m2, m5, m6 2065*c0909341SAndroid Build Coastguard Worker jmp tx2q 2066*c0909341SAndroid Build Coastguard Worker.pass2: 2067*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_10bpc).pass2_main 2068*c0909341SAndroid Build Coastguard Worker vpermq m2, m0, q2031 2069*c0909341SAndroid Build Coastguard Worker vpermq m0, m1, q2031 2070*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_10bpc).end 2071*c0909341SAndroid Build Coastguard Worker 2072*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct 2073*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst 2074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst 2075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity 2076*c0909341SAndroid Build Coastguard Worker 2077*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_10bpc, 0, 7, 10, dst, stride, c, eob, tx2 2078*c0909341SAndroid Build Coastguard Worker.pass1: 2079*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_2896] 2080*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 2081*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 2082*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 2083*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 2084*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 2085*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m4}, m0, m1, m2, m3 2086*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3 2087*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 2088*c0909341SAndroid Build Coastguard Worker REPX {paddd x, x }, m0, m1, m2, m3 2089*c0909341SAndroid Build Coastguard Worker jmp tx2q 2090*c0909341SAndroid Build Coastguard Worker.pass2: 2091*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_10bpc_max] 2092*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_1697x8] 2093*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2094*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 2095*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4, m0 2096*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m2 2097*c0909341SAndroid Build Coastguard Worker paddsw m0, m1 2098*c0909341SAndroid Build Coastguard Worker paddsw m2, m4 2099*c0909341SAndroid Build Coastguard Worker packssdw m7, m7 ; pw_2048 2100*c0909341SAndroid Build Coastguard Worker.pass2_end: 2101*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m2 2102*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 2103*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 2104*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m0, m1 2105*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2106*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7 2107*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7 2108*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m2 2109*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 2110*c0909341SAndroid Build Coastguard Worker mova xm2, [dstq+strideq*0] 2111*c0909341SAndroid Build Coastguard Worker vinserti128 m2, [r6 +strideq*0], 1 2112*c0909341SAndroid Build Coastguard Worker mova xm3, [dstq+strideq*1] 2113*c0909341SAndroid Build Coastguard Worker vinserti128 m3, [r6 +strideq*1], 1 2114*c0909341SAndroid Build Coastguard Worker pxor m4, m4 2115*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3 2116*c0909341SAndroid Build Coastguard Worker paddw m0, m2 2117*c0909341SAndroid Build Coastguard Worker paddw m1, m3 2118*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m4 2119*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m4 2120*c0909341SAndroid Build Coastguard Worker pminsw m0, m5 2121*c0909341SAndroid Build Coastguard Worker pminsw m1, m5 2122*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 2123*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], xm1 2124*c0909341SAndroid Build Coastguard Worker vextracti128 [r6 +strideq*0], m0, 1 2125*c0909341SAndroid Build Coastguard Worker vextracti128 [r6 +strideq*1], m1, 1 2126*c0909341SAndroid Build Coastguard Worker RET 2127*c0909341SAndroid Build Coastguard Worker 2128*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, dct, 12 2129*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, identity, 12 2130*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, adst, 12 2131*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN dct, flipadst, 12 2132*c0909341SAndroid Build Coastguard Worker 2133*c0909341SAndroid Build Coastguard Workercglobal idct_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 2134*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_20b_min] 2135*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_20b_max] 2136*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x4_internal_10bpc).pass1 2137*c0909341SAndroid Build Coastguard Worker.pass2: 2138*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 2139*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 2140*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 2141*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 2142*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_12bpc).transpose_4x8 2143*c0909341SAndroid Build Coastguard Worker IDCT4_1D 0, 1, 2, 3, 4, 5, 6, 7 2144*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_12bpc).end 2145*c0909341SAndroid Build Coastguard Worker 2146*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, dct, 12 2147*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, adst, 12 2148*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, flipadst, 12 2149*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN adst, identity, 12 2150*c0909341SAndroid Build Coastguard Worker 2151*c0909341SAndroid Build Coastguard Workercglobal iadst_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 2152*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_20b_min] 2153*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_20b_max] 2154*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_10bpc).main2 2155*c0909341SAndroid Build Coastguard Worker vpblendd m3, m0, m4, 0x33 ; out6 out7 2156*c0909341SAndroid Build Coastguard Worker vpblendd m0, m4, 0xcc ; out0 out1 2157*c0909341SAndroid Build Coastguard Worker pshufd m1, m5, q1032 2158*c0909341SAndroid Build Coastguard Worker psignd m2, m6 ; out4 out5 2159*c0909341SAndroid Build Coastguard Worker psignd m1, m6 ; out2 out3 2160*c0909341SAndroid Build Coastguard Worker jmp tx2q 2161*c0909341SAndroid Build Coastguard Worker.pass2: 2162*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 2163*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 2164*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 2165*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 2166*c0909341SAndroid Build Coastguard Worker call .pass2_main 2167*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 2168*c0909341SAndroid Build Coastguard Worker paddd m0, m5, m4 2169*c0909341SAndroid Build Coastguard Worker paddd m1, m5, m6 2170*c0909341SAndroid Build Coastguard Worker paddd m2, m5 2171*c0909341SAndroid Build Coastguard Worker paddd m3, m5 2172*c0909341SAndroid Build Coastguard Worker.pass2_end: 2173*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3 2174*c0909341SAndroid Build Coastguard Worker.end: 2175*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_16384] 2176*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 3}, m0, m1, m2, m3 2177*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2178*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 2179*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 2180*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m2, m4 2181*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 ; out0 out1 2182*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 ; out2 out3 2183*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 2184*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_10bpc).end2 2185*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2186*c0909341SAndroid Build Coastguard Worker.pass2_main: 2187*c0909341SAndroid Build Coastguard Worker call .transpose_4x8 2188*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_10bpc).main2 2189*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2190*c0909341SAndroid Build Coastguard Worker.transpose_4x8: 2191*c0909341SAndroid Build Coastguard Worker ; deinterleave 2192*c0909341SAndroid Build Coastguard Worker pshufd m0, m0, q3120 2193*c0909341SAndroid Build Coastguard Worker pshufd m1, m1, q3120 2194*c0909341SAndroid Build Coastguard Worker pshufd m2, m2, q3120 2195*c0909341SAndroid Build Coastguard Worker pshufd m3, m3, q3120 2196*c0909341SAndroid Build Coastguard Worker ; transpose 2197*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m0, m1 2198*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m1 2199*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m2, m3 2200*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m3 2201*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x20 ; out1 2202*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m2, 0x31 ; out3 2203*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m4, m5, 0x31 ; out2 2204*c0909341SAndroid Build Coastguard Worker vperm2i128 m0, m4, m5, 0x20 ; out0 2205*c0909341SAndroid Build Coastguard Worker ret 2206*c0909341SAndroid Build Coastguard Worker 2207*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, dct, 12 2208*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, adst, 12 2209*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, flipadst, 12 2210*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN flipadst, identity, 12 2211*c0909341SAndroid Build Coastguard Worker 2212*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x4_internal_12bpc, 0, 5, 10, dst, stride, c, eob, tx2 2213*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_20b_min] 2214*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_20b_max] 2215*c0909341SAndroid Build Coastguard Worker call m(iadst_4x8_internal_10bpc).main2 2216*c0909341SAndroid Build Coastguard Worker shufpd m3, m4, m0, 0x05 2217*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x05 2218*c0909341SAndroid Build Coastguard Worker psignd m2, m6 2219*c0909341SAndroid Build Coastguard Worker pshufd m6, m6, q1032 2220*c0909341SAndroid Build Coastguard Worker pshufd m1, m2, q1032 2221*c0909341SAndroid Build Coastguard Worker psignd m2, m5, m6 2222*c0909341SAndroid Build Coastguard Worker jmp tx2q 2223*c0909341SAndroid Build Coastguard Worker.pass2: 2224*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 2225*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 2226*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 2227*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 2228*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_12bpc).pass2_main 2229*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_2048] 2230*c0909341SAndroid Build Coastguard Worker paddd m0, m5, m3 2231*c0909341SAndroid Build Coastguard Worker paddd m1, m5, m2 2232*c0909341SAndroid Build Coastguard Worker paddd m3, m5, m4 2233*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m6 2234*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x4_internal_12bpc).pass2_end 2235*c0909341SAndroid Build Coastguard Worker 2236*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, dct, 12 2237*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, adst, 12 2238*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, flipadst, 12 2239*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X4_FN identity, identity, 12 2240*c0909341SAndroid Build Coastguard Worker 2241*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2 2242*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x4_internal_10bpc).pass1 2243*c0909341SAndroid Build Coastguard Worker.pass2: 2244*c0909341SAndroid Build Coastguard Worker ; m0 = in0 in1 (interleaved) 2245*c0909341SAndroid Build Coastguard Worker ; m1 = in2 in3 (interleaved) 2246*c0909341SAndroid Build Coastguard Worker ; m2 = in4 in5 (interleaved) 2247*c0909341SAndroid Build Coastguard Worker ; m3 = in6 in7 (interleaved) 2248*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 2249*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 2250*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m0, m1, m2, m3 2251*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m0, m1, m2, m3 2252*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_5793] 2253*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m4}, m0, m1, m2, m3 2254*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3 2255*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m0, m1, m2, m3 2256*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 2257*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pw_16384] 2258*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2259*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 2260*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x4_internal_10bpc).pass2_end 2261*c0909341SAndroid Build Coastguard Worker 2262*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X8_FN 2-3 10 ; type1, type2, bitdepth 2263*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 8x8, %3 2264*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2265*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [dconly_%3bpc] 2266*c0909341SAndroid Build Coastguard Worker%if %3 = 10 2267*c0909341SAndroid Build Coastguard Worker.dconly: 2268*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 2269*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 2270*c0909341SAndroid Build Coastguard Worker or r3d, 8 2271*c0909341SAndroid Build Coastguard Worker.dconly2: 2272*c0909341SAndroid Build Coastguard Worker add r6d, 384 2273*c0909341SAndroid Build Coastguard Worker sar r6d, 9 2274*c0909341SAndroid Build Coastguard Worker.dconly3: 2275*c0909341SAndroid Build Coastguard Worker imul r6d, 181 2276*c0909341SAndroid Build Coastguard Worker add r6d, 2176 2277*c0909341SAndroid Build Coastguard Worker sar r6d, 12 2278*c0909341SAndroid Build Coastguard Worker movd xm0, r6d 2279*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm2 2280*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 2281*c0909341SAndroid Build Coastguard Worker.dconly_loop: 2282*c0909341SAndroid Build Coastguard Worker mova xm1, [dstq+strideq*0] 2283*c0909341SAndroid Build Coastguard Worker vinserti128 m1, [dstq+strideq*1], 1 2284*c0909341SAndroid Build Coastguard Worker paddsw m1, m0 2285*c0909341SAndroid Build Coastguard Worker psubusw m1, m2 2286*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm1 2287*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m1, 1 2288*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 2289*c0909341SAndroid Build Coastguard Worker sub r3d, 2 2290*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 2291*c0909341SAndroid Build Coastguard Worker RET 2292*c0909341SAndroid Build Coastguard Worker%else 2293*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly 2294*c0909341SAndroid Build Coastguard Worker%endif 2295*c0909341SAndroid Build Coastguard Worker%endif 2296*c0909341SAndroid Build Coastguard Worker%endmacro 2297*c0909341SAndroid Build Coastguard Worker 2298*c0909341SAndroid Build Coastguard Worker%macro IADST8_1D 14 ; src[1-8], tmp[1-3], pd_2048, clip[1-2] 2299*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %8, %1, %9, %10, %11, %12, 401, 4076 ; t1a, t0a 2300*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %2, %7, %9, %10, %11, %12, 3920, 1189 ; t7a, t6a 2301*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %6, %3, %9, %10, %11, %12, 1931, 3612 ; t3a, t2a 2302*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %4, %5, %9, %10, %11, %12, 3166, 2598 ; t5a, t4a 2303*c0909341SAndroid Build Coastguard Worker psubd m%9, m%3, m%7 ; t6 2304*c0909341SAndroid Build Coastguard Worker paddd m%3, m%7 ; t2 2305*c0909341SAndroid Build Coastguard Worker psubd m%7, m%1, m%5 ; t4 2306*c0909341SAndroid Build Coastguard Worker paddd m%1, m%5 ; t0 2307*c0909341SAndroid Build Coastguard Worker psubd m%5, m%6, m%2 ; t7 2308*c0909341SAndroid Build Coastguard Worker paddd m%6, m%2 ; t3 2309*c0909341SAndroid Build Coastguard Worker psubd m%2, m%8, m%4 ; t5 2310*c0909341SAndroid Build Coastguard Worker paddd m%8, m%4 ; t1 2311*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m%13}, m%7, m%2, m%9, m%5, m%3, m%1, m%6, m%8 2312*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m%14}, m%7, m%2, m%9, m%5, m%3, m%1, m%6, m%8 2313*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %7, %2, %4, %10, %11, %12, 1567, 3784 ; t5a, t4a 2314*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D %5, %9, %4, %10, %11, %12, 3784, %11 ; t6a, t7a 2315*c0909341SAndroid Build Coastguard Worker psubd m%10, m%7, m%9 ; t7 2316*c0909341SAndroid Build Coastguard Worker paddd m%7, m%9 ; out6 2317*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%9, [pd_1448] 2318*c0909341SAndroid Build Coastguard Worker psubd m%4, m%8, m%6 ; t3 2319*c0909341SAndroid Build Coastguard Worker paddd m%8, m%6 ; -out7 2320*c0909341SAndroid Build Coastguard Worker psubd m%6, m%1, m%3 ; t2 2321*c0909341SAndroid Build Coastguard Worker paddd m%1, m%3 ; out0 2322*c0909341SAndroid Build Coastguard Worker psubd m%3, m%2, m%5 ; t6 2323*c0909341SAndroid Build Coastguard Worker paddd m%2, m%5 ; -out1 2324*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m%13}, m%6, m%4, m%3, m%10 2325*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m%14}, m%6, m%4, m%3, m%10 2326*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m%9 }, m%6, m%4, m%3, m%10 2327*c0909341SAndroid Build Coastguard Worker psubd m%5, m%6, m%4 ; (t2 - t3) * 1448 2328*c0909341SAndroid Build Coastguard Worker paddd m%4, m%6 ; (t2 + t3) * 1448 2329*c0909341SAndroid Build Coastguard Worker psubd m%6, m%3, m%10 ; (t6 - t7) * 1448 2330*c0909341SAndroid Build Coastguard Worker paddd m%3, m%10 ; (t6 + t7) * 1448 2331*c0909341SAndroid Build Coastguard Worker%endmacro 2332*c0909341SAndroid Build Coastguard Worker 2333*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct 2334*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity 2335*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst 2336*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst 2337*c0909341SAndroid Build Coastguard Worker 2338*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 2339*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2340*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2341*c0909341SAndroid Build Coastguard Worker.pass1: 2342*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 2343*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 2344*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 2345*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 2346*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*4] 2347*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*5] 2348*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*6] 2349*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*7] 2350*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 2351*c0909341SAndroid Build Coastguard Worker call .main 2352*c0909341SAndroid Build Coastguard Worker call .round_shift1 2353*c0909341SAndroid Build Coastguard Worker jmp tx2q 2354*c0909341SAndroid Build Coastguard Worker.pass2: 2355*c0909341SAndroid Build Coastguard Worker call .transpose_8x8_packed 2356*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_8bpc).main 2357*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 2358*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 2359*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 2360*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 2361*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 2362*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 2363*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 2364*c0909341SAndroid Build Coastguard Worker call .write_8x4_start 2365*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m12 2366*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, m12 2367*c0909341SAndroid Build Coastguard Worker call .write_8x4 2368*c0909341SAndroid Build Coastguard Worker RET 2369*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2370*c0909341SAndroid Build Coastguard Worker.write_8x4_start: 2371*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_10bpc_max] 2372*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 2373*c0909341SAndroid Build Coastguard Worker pxor m10, m10 2374*c0909341SAndroid Build Coastguard Worker.write_8x4: 2375*c0909341SAndroid Build Coastguard Worker mova xm8, [dstq+strideq*0] 2376*c0909341SAndroid Build Coastguard Worker vinserti128 m8, [dstq+strideq*1], 1 2377*c0909341SAndroid Build Coastguard Worker mova xm9, [dstq+strideq*2] 2378*c0909341SAndroid Build Coastguard Worker vinserti128 m9, [dstq+r6 ], 1 2379*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m10 2380*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m10 2381*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m10 2382*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m10 2383*c0909341SAndroid Build Coastguard Worker add cq, 32*4 2384*c0909341SAndroid Build Coastguard Worker paddw m0, m8 2385*c0909341SAndroid Build Coastguard Worker paddw m1, m9 2386*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m10 2387*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m10 2388*c0909341SAndroid Build Coastguard Worker pminsw m0, m11 2389*c0909341SAndroid Build Coastguard Worker pminsw m1, m11 2390*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 2391*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*1], m0, 1 2392*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], xm1 2393*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+r6 ], m1, 1 2394*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 2395*c0909341SAndroid Build Coastguard Worker ret 2396*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2397*c0909341SAndroid Build Coastguard Worker.transpose_8x8_packed: 2398*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 2399*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 2400*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 2401*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 2402*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 2403*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 2404*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2405*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 2406*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2407*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 2408*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2409*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m4, m1 2410*c0909341SAndroid Build Coastguard Worker punpckldq m4, m1 2411*c0909341SAndroid Build Coastguard Worker vinserti128 m1, m3, xm2, 1 2412*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m2, 0x31 2413*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m4, 0x31 2414*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 2415*c0909341SAndroid Build Coastguard Worker ret 2416*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2417*c0909341SAndroid Build Coastguard Worker.main_rect2: 2418*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 2419*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 2420*c0909341SAndroid Build Coastguard Worker.main: 2421*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 3, 8, 9, 10, 11, 3406, 2276 ; t5a t6a 2422*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 7, 8, 9, 10, 11, 799, 4017 ; t4a t7a 2423*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 6, 8, 9, 10, 11, 1567, 3784 ; t2 t3 2424*c0909341SAndroid Build Coastguard Worker paddd m8, m1, m5 ; t4 2425*c0909341SAndroid Build Coastguard Worker psubd m1, m5 ; t5a 2426*c0909341SAndroid Build Coastguard Worker paddd m9, m7, m3 ; t7 2427*c0909341SAndroid Build Coastguard Worker psubd m7, m3 ; t6a 2428*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [pd_2896] 2429*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m1, m8, m7, m9 2430*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m1, m8, m7, m9 2431*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m3 }, m0, m4, m7, m1 2432*c0909341SAndroid Build Coastguard Worker paddd m0, m11 2433*c0909341SAndroid Build Coastguard Worker paddd m7, m11 2434*c0909341SAndroid Build Coastguard Worker psubd m5, m0, m4 2435*c0909341SAndroid Build Coastguard Worker paddd m0, m4 2436*c0909341SAndroid Build Coastguard Worker psubd m4, m7, m1 2437*c0909341SAndroid Build Coastguard Worker paddd m7, m1 2438*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m5, m0, m4, m7 2439*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m6 ; dct4 out3 2440*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; dct4 out0 2441*c0909341SAndroid Build Coastguard Worker paddd m6, m5, m2 ; dct4 out1 2442*c0909341SAndroid Build Coastguard Worker psubd m5, m2 ; dct4 out2 2443*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m6, m5, m3 2444*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m6, m5, m3 2445*c0909341SAndroid Build Coastguard Worker ret 2446*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2447*c0909341SAndroid Build Coastguard Worker.round_shift1: 2448*c0909341SAndroid Build Coastguard Worker pcmpeqd m1, m1 2449*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m1}, m0, m6, m5, m3 2450*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; out1 2451*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; out6 2452*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m9 ; out7 2453*c0909341SAndroid Build Coastguard Worker paddd m0, m9 ; out0 2454*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m4 ; out2 2455*c0909341SAndroid Build Coastguard Worker psubd m5, m4 ; out5 2456*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; out4 2457*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; out3 2458*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m1, m2, m3, m4, m5, m6, m7 2459*c0909341SAndroid Build Coastguard Worker ret 2460*c0909341SAndroid Build Coastguard Worker 2461*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct 2462*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst 2463*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst 2464*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity 2465*c0909341SAndroid Build Coastguard Worker 2466*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 2467*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2468*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2469*c0909341SAndroid Build Coastguard Worker.pass1: 2470*c0909341SAndroid Build Coastguard Worker call .main 2471*c0909341SAndroid Build Coastguard Worker call .main_end 2472*c0909341SAndroid Build Coastguard Worker jmp tx2q 2473*c0909341SAndroid Build Coastguard Worker.pass2: 2474*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).transpose_8x8_packed 2475*c0909341SAndroid Build Coastguard Worker pshufd m4, m0, q1032 2476*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q1032 2477*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass2 2478*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_2048] 2479*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm12, [pw_4096] 2480*c0909341SAndroid Build Coastguard Worker psubw m12, m5 2481*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 2482*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 2483*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 2484*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 2485*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m12 2486*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, m12 2487*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2488*c0909341SAndroid Build Coastguard Worker RET 2489*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2490*c0909341SAndroid Build Coastguard Worker.main: 2491*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 2492*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*7] 2493*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 2494*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*6] 2495*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 2496*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*5] 2497*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 2498*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*4] 2499*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 2500*c0909341SAndroid Build Coastguard Worker.main2: 2501*c0909341SAndroid Build Coastguard Worker IADST8_1D 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 2502*c0909341SAndroid Build Coastguard Worker psrld m8, 10 ; pd_1 2503*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_3072] 2504*c0909341SAndroid Build Coastguard Worker ret 2505*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2506*c0909341SAndroid Build Coastguard Worker.main_end: 2507*c0909341SAndroid Build Coastguard Worker paddd m0, m8 2508*c0909341SAndroid Build Coastguard Worker psubd m1, m8, m1 2509*c0909341SAndroid Build Coastguard Worker paddd m6, m8 2510*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m7 2511*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m1, m6, m7 2512*c0909341SAndroid Build Coastguard Worker ; (1 + ((x + 1024) >> 11)) >> 1 = (3072 + x) >> 12 2513*c0909341SAndroid Build Coastguard Worker ; (1 - ((x + 1024) >> 11)) >> 1 = (3071 - x) >> 12 2514*c0909341SAndroid Build Coastguard Worker psubd m8, m9, m8 ; pd_3071 2515*c0909341SAndroid Build Coastguard Worker paddd m2, m9 2516*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m3 2517*c0909341SAndroid Build Coastguard Worker paddd m4, m9 2518*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m5 2519*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m2, m3, m4, m5 2520*c0909341SAndroid Build Coastguard Worker ret 2521*c0909341SAndroid Build Coastguard Worker 2522*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct 2523*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst 2524*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst 2525*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity 2526*c0909341SAndroid Build Coastguard Worker 2527*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 2528*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2529*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2530*c0909341SAndroid Build Coastguard Worker.pass1: 2531*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main 2532*c0909341SAndroid Build Coastguard Worker call .main_end 2533*c0909341SAndroid Build Coastguard Worker jmp tx2q 2534*c0909341SAndroid Build Coastguard Worker.pass2: 2535*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).transpose_8x8_packed 2536*c0909341SAndroid Build Coastguard Worker pshufd m4, m0, q1032 2537*c0909341SAndroid Build Coastguard Worker pshufd m5, m1, q1032 2538*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_8bpc).main_pass2 2539*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 2540*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm5, [pw_4096] 2541*c0909341SAndroid Build Coastguard Worker psubw m12, m5 2542*c0909341SAndroid Build Coastguard Worker vpermq m8, m3, q2031 2543*c0909341SAndroid Build Coastguard Worker vpermq m9, m2, q2031 2544*c0909341SAndroid Build Coastguard Worker vpermq m2, m1, q2031 2545*c0909341SAndroid Build Coastguard Worker vpermq m3, m0, q2031 2546*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m8, m12 2547*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m9, m12 2548*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 2549*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m12 2550*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, m12 2551*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2552*c0909341SAndroid Build Coastguard Worker RET 2553*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2554*c0909341SAndroid Build Coastguard Worker.main_end: 2555*c0909341SAndroid Build Coastguard Worker paddd m10, m8, m0 2556*c0909341SAndroid Build Coastguard Worker psubd m0, m8, m7 2557*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m1 2558*c0909341SAndroid Build Coastguard Worker paddd m1, m8, m6 2559*c0909341SAndroid Build Coastguard Worker psrad m0, 1 2560*c0909341SAndroid Build Coastguard Worker psrad m1, 1 2561*c0909341SAndroid Build Coastguard Worker psrad m6, m7, 1 2562*c0909341SAndroid Build Coastguard Worker psrad m7, m10, 1 2563*c0909341SAndroid Build Coastguard Worker psubd m8, m9, m8 ; pd_6143 2564*c0909341SAndroid Build Coastguard Worker psubd m10, m8, m5 2565*c0909341SAndroid Build Coastguard Worker paddd m5, m9, m2 2566*c0909341SAndroid Build Coastguard Worker psubd m2, m8, m3 2567*c0909341SAndroid Build Coastguard Worker paddd m3, m9, m4 2568*c0909341SAndroid Build Coastguard Worker psrad m4, m2, 12 2569*c0909341SAndroid Build Coastguard Worker psrad m2, m10, 12 2570*c0909341SAndroid Build Coastguard Worker psrad m3, 12 2571*c0909341SAndroid Build Coastguard Worker psrad m5, 12 2572*c0909341SAndroid Build Coastguard Worker ret 2573*c0909341SAndroid Build Coastguard Worker 2574*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct 2575*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst 2576*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst 2577*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity 2578*c0909341SAndroid Build Coastguard Worker 2579*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 2580*c0909341SAndroid Build Coastguard Worker.pass1: 2581*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 2582*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 2583*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*2] 2584*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*3] 2585*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*4] 2586*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*5] 2587*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*6] 2588*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*7] 2589*c0909341SAndroid Build Coastguard Worker jmp tx2q 2590*c0909341SAndroid Build Coastguard Worker.pass2: 2591*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 2592*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 2593*c0909341SAndroid Build Coastguard Worker.pass2_main: 2594*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 2595*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 2596*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 2597*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_4096] 2598*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 2599*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2600*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 2601*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2602*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 2603*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2604*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m1 2605*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m1 2606*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 ; 1 5 2607*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 ; 0 4 2608*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 ; 2 6 2609*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 ; 3 7 2610*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 2611*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 2612*c0909341SAndroid Build Coastguard Worker call .write_2x8x2_start 2613*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m12 2614*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, m12 2615*c0909341SAndroid Build Coastguard Worker call .write_2x8x2_zero 2616*c0909341SAndroid Build Coastguard Worker RET 2617*c0909341SAndroid Build Coastguard Worker.write_2x8x2_start: 2618*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*5] 2619*c0909341SAndroid Build Coastguard Worker pxor m6, m6 2620*c0909341SAndroid Build Coastguard Worker.write_2x8x2_zero: 2621*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m6 2622*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m6 2623*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m6 2624*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m6 2625*c0909341SAndroid Build Coastguard Worker add cq, 32*4 2626*c0909341SAndroid Build Coastguard Worker.write_2x8x2: 2627*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*0] 2628*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [dstq+strideq*4], 1 2629*c0909341SAndroid Build Coastguard Worker mova xm5, [dstq+strideq*1] 2630*c0909341SAndroid Build Coastguard Worker vinserti128 m5, [dstq+r6 ], 1 2631*c0909341SAndroid Build Coastguard Worker paddw m0, m4 2632*c0909341SAndroid Build Coastguard Worker paddw m1, m5 2633*c0909341SAndroid Build Coastguard Worker pmaxsw m0, m6 2634*c0909341SAndroid Build Coastguard Worker pmaxsw m1, m6 2635*c0909341SAndroid Build Coastguard Worker pminsw m0, m7 2636*c0909341SAndroid Build Coastguard Worker pminsw m1, m7 2637*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 2638*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], xm1 2639*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*4], m0, 1 2640*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+r6 ], m1, 1 2641*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 2642*c0909341SAndroid Build Coastguard Worker ret 2643*c0909341SAndroid Build Coastguard Worker 2644*c0909341SAndroid Build Coastguard Worker%macro TRANSPOSE_8X8_DWORD 12 ; src/dst[1-8], tmp[1-4] 2645*c0909341SAndroid Build Coastguard Worker punpckldq m%9, m%1, m%2 ; aibj emfn 2646*c0909341SAndroid Build Coastguard Worker punpckhdq m%1, m%2 ; ckdl gohp 2647*c0909341SAndroid Build Coastguard Worker punpckldq m%10, m%3, m%4 ; qyrz uCvD 2648*c0909341SAndroid Build Coastguard Worker punpckhdq m%3, m%4 ; sAtB wExF 2649*c0909341SAndroid Build Coastguard Worker punpckldq m%11, m%5, m%6 ; GOHP KSLT 2650*c0909341SAndroid Build Coastguard Worker punpckhdq m%5, m%6 ; IQJR MUNV 2651*c0909341SAndroid Build Coastguard Worker punpckldq m%12, m%7, m%8 ; WeXf aibj 2652*c0909341SAndroid Build Coastguard Worker punpckhdq m%7, m%8 ; YgZh ckdl 2653*c0909341SAndroid Build Coastguard Worker punpcklqdq m%2, m%9, m%10 ; aiqy emuC 2654*c0909341SAndroid Build Coastguard Worker punpckhqdq m%9, m%10 ; bjrz fnvD 2655*c0909341SAndroid Build Coastguard Worker punpcklqdq m%4, m%1, m%3 ; cksA gowE 2656*c0909341SAndroid Build Coastguard Worker punpckhqdq m%10, m%1, m%3 ; dltB hpxF 2657*c0909341SAndroid Build Coastguard Worker punpcklqdq m%6, m%11, m%12 ; GOWe KSai 2658*c0909341SAndroid Build Coastguard Worker punpckhqdq m%11, m%12 ; HPXf LTbj 2659*c0909341SAndroid Build Coastguard Worker punpcklqdq m%8, m%5, m%7 ; IQYg MUck 2660*c0909341SAndroid Build Coastguard Worker punpckhqdq m%12, m%5, m%7 ; JRZh NVdl 2661*c0909341SAndroid Build Coastguard Worker vperm2i128 m%1, m%2, m%6, 0x20 ; out0 2662*c0909341SAndroid Build Coastguard Worker vperm2i128 m%5, m%2, m%6, 0x31 ; out4 2663*c0909341SAndroid Build Coastguard Worker vperm2i128 m%2, m%9, m%11, 0x20 ; out1 2664*c0909341SAndroid Build Coastguard Worker vperm2i128 m%6, m%9, m%11, 0x31 ; out5 2665*c0909341SAndroid Build Coastguard Worker vperm2i128 m%3, m%4, m%8, 0x20 ; out2 2666*c0909341SAndroid Build Coastguard Worker vperm2i128 m%7, m%4, m%8, 0x31 ; out6 2667*c0909341SAndroid Build Coastguard Worker vperm2i128 m%4, m%10, m%12, 0x20 ; out3 2668*c0909341SAndroid Build Coastguard Worker vperm2i128 m%8, m%10, m%12, 0x31 ; out7 2669*c0909341SAndroid Build Coastguard Worker%endmacro 2670*c0909341SAndroid Build Coastguard Worker 2671*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, dct, 12 2672*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, identity, 12 2673*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, adst, 12 2674*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN dct, flipadst, 12 2675*c0909341SAndroid Build Coastguard Worker 2676*c0909341SAndroid Build Coastguard Workercglobal idct_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 2677*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 2678*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 2679*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).pass1 2680*c0909341SAndroid Build Coastguard Worker.pass2: 2681*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2682*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2683*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 2684*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 2685*c0909341SAndroid Build Coastguard Worker call .transpose_8x8 2686*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 2687*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 2688*c0909341SAndroid Build Coastguard Worker call .round_shift4 2689*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_12bpc).pass2_end 2690*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2691*c0909341SAndroid Build Coastguard Worker.write_8x4_start: 2692*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pixel_12bpc_max] 2693*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 2694*c0909341SAndroid Build Coastguard Worker pxor m10, m10 2695*c0909341SAndroid Build Coastguard Worker ret 2696*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2697*c0909341SAndroid Build Coastguard Worker.transpose_8x8: 2698*c0909341SAndroid Build Coastguard Worker TRANSPOSE_8X8_DWORD 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 2699*c0909341SAndroid Build Coastguard Worker ret 2700*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2701*c0909341SAndroid Build Coastguard Worker.round_shift4: 2702*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pd_8] 2703*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m1}, m0, m6, m5, m3 2704*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; out1 2705*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; out6 2706*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m9 ; out7 2707*c0909341SAndroid Build Coastguard Worker paddd m0, m9 ; out0 2708*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m4 ; out2 2709*c0909341SAndroid Build Coastguard Worker psubd m5, m4 ; out5 2710*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; out4 2711*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; out3 2712*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7 2713*c0909341SAndroid Build Coastguard Worker ret 2714*c0909341SAndroid Build Coastguard Worker 2715*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, dct, 12 2716*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, adst, 12 2717*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, flipadst, 12 2718*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN adst, identity, 12 2719*c0909341SAndroid Build Coastguard Worker 2720*c0909341SAndroid Build Coastguard Workercglobal iadst_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 2721*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 2722*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 2723*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_10bpc).pass1 2724*c0909341SAndroid Build Coastguard Worker.pass2: 2725*c0909341SAndroid Build Coastguard Worker call .pass2_main 2726*c0909341SAndroid Build Coastguard Worker.pass2_end: 2727*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 2728*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 2729*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1 2730*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).write_8x4_start 2731*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2732*c0909341SAndroid Build Coastguard Worker packssdw m0, m4, m5 2733*c0909341SAndroid Build Coastguard Worker packssdw m1, m6, m7 2734*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1 2735*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2736*c0909341SAndroid Build Coastguard Worker RET 2737*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2738*c0909341SAndroid Build Coastguard Worker.pass2_main: 2739*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2740*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2741*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 2742*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 2743*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).transpose_8x8 2744*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 2745*c0909341SAndroid Build Coastguard Worker.pass2_main2: 2746*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main2 2747*c0909341SAndroid Build Coastguard Worker pslld m9, m8, 3 ; pd_8 2748*c0909341SAndroid Build Coastguard Worker paddd m0, m9 2749*c0909341SAndroid Build Coastguard Worker psubd m1, m9, m1 ; 8+x 2750*c0909341SAndroid Build Coastguard Worker paddd m6, m9 2751*c0909341SAndroid Build Coastguard Worker psubd m7, m9, m7 2752*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m1, m6, m7 2753*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_17408] 2754*c0909341SAndroid Build Coastguard Worker psubd m8, m9, m8 ; 17407 2755*c0909341SAndroid Build Coastguard Worker paddd m2, m9 2756*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m3 2757*c0909341SAndroid Build Coastguard Worker paddd m4, m9 2758*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m5 2759*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m2, m3, m4, m5 2760*c0909341SAndroid Build Coastguard Worker ret 2761*c0909341SAndroid Build Coastguard Worker 2762*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, dct, 12 2763*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, adst, 12 2764*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, flipadst, 12 2765*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN flipadst, identity, 12 2766*c0909341SAndroid Build Coastguard Worker 2767*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 2768*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 2769*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 2770*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x8_internal_10bpc).pass1 2771*c0909341SAndroid Build Coastguard Worker.pass2: 2772*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_12bpc).pass2_main 2773*c0909341SAndroid Build Coastguard Worker packssdw m7, m7, m6 2774*c0909341SAndroid Build Coastguard Worker packssdw m6, m1, m0 2775*c0909341SAndroid Build Coastguard Worker packssdw m1, m5, m4 2776*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, q3120 2777*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 2778*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).write_8x4_start 2779*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2780*c0909341SAndroid Build Coastguard Worker packssdw m0, m3, m2 2781*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 2782*c0909341SAndroid Build Coastguard Worker vpermq m1, m6, q3120 2783*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2784*c0909341SAndroid Build Coastguard Worker RET 2785*c0909341SAndroid Build Coastguard Worker 2786*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, dct, 12 2787*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, adst, 12 2788*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, flipadst, 12 2789*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X8_FN identity, identity, 12 2790*c0909341SAndroid Build Coastguard Worker 2791*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 2792*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x8_internal_10bpc).pass1 2793*c0909341SAndroid Build Coastguard Worker.pass2: 2794*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 2795*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 2796*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x8_internal_10bpc).pass2_main 2797*c0909341SAndroid Build Coastguard Worker 2798*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_8X16_FN 2-4 0,10 ; type1, type2, eob_offset, bitdepth 2799*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 8x16, %4 2800*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 2801*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 2802*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [dconly_%4bpc] 2803*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 2804*c0909341SAndroid Build Coastguard Worker or r3d, 16 2805*c0909341SAndroid Build Coastguard Worker add r6d, 128 2806*c0909341SAndroid Build Coastguard Worker sar r6d, 8 2807*c0909341SAndroid Build Coastguard Worker imul r6d, 181 2808*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly2 2809*c0909341SAndroid Build Coastguard Worker%endif 2810*c0909341SAndroid Build Coastguard Worker%endmacro 2811*c0909341SAndroid Build Coastguard Worker 2812*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct 2813*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity, 35 2814*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst 2815*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst 2816*c0909341SAndroid Build Coastguard Worker 2817*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 2818*c0909341SAndroid Build Coastguard Worker%undef cmp 2819*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 2820*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 2821*c0909341SAndroid Build Coastguard Worker.pass1: 2822*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 2823*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 2824*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 2825*c0909341SAndroid Build Coastguard Worker jl .fast 2826*c0909341SAndroid Build Coastguard Worker add cq, 32 2827*c0909341SAndroid Build Coastguard Worker call .pass1_main 2828*c0909341SAndroid Build Coastguard Worker sub cq, 32 2829*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m0 2830*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m1 2831*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m2 2832*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m3 2833*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m4 2834*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m5 2835*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m6 2836*c0909341SAndroid Build Coastguard Worker mova m15, m7 2837*c0909341SAndroid Build Coastguard Worker call .pass1_main 2838*c0909341SAndroid Build Coastguard Worker mova m8, [cq+32* 1] 2839*c0909341SAndroid Build Coastguard Worker mova m9, [cq+32* 3] 2840*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32* 5] 2841*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32* 7] 2842*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32* 9] 2843*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*11] 2844*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*13] 2845*c0909341SAndroid Build Coastguard Worker jmp tx2q 2846*c0909341SAndroid Build Coastguard Worker.fast: 2847*c0909341SAndroid Build Coastguard Worker call .pass1_main 2848*c0909341SAndroid Build Coastguard Worker pxor m8, m8 2849*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 2850*c0909341SAndroid Build Coastguard Worker jmp tx2q 2851*c0909341SAndroid Build Coastguard Worker.pass2: 2852*c0909341SAndroid Build Coastguard Worker call .transpose 2853*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_8bpc).main 2854*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 2855*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m2, m4, m6 2856*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q2031}, m1, m3, m5, m7 2857*c0909341SAndroid Build Coastguard Worker.end: 2858*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 2859*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 2860*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 2861*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m2, m12 2862*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m3, m12 2863*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2864*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, m12 2865*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5, m12 2866*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2867*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m6, m12 2868*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m7, m12 2869*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 2870*c0909341SAndroid Build Coastguard Worker RET 2871*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2872*c0909341SAndroid Build Coastguard Worker.transpose: 2873*c0909341SAndroid Build Coastguard Worker packssdw m0, m8 2874*c0909341SAndroid Build Coastguard Worker packssdw m1, m9 2875*c0909341SAndroid Build Coastguard Worker packssdw m2, m10 2876*c0909341SAndroid Build Coastguard Worker packssdw m3, m11 2877*c0909341SAndroid Build Coastguard Worker packssdw m4, m12 2878*c0909341SAndroid Build Coastguard Worker packssdw m5, m13 2879*c0909341SAndroid Build Coastguard Worker packssdw m6, m14 2880*c0909341SAndroid Build Coastguard Worker packssdw m7, m15 2881*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 2882*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 2883*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 2884*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 2885*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 2886*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m4, m5 2887*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 2888*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6, m7 2889*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 2890*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m3, m6 2891*c0909341SAndroid Build Coastguard Worker punpckldq m3, m6 2892*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m4, m5 2893*c0909341SAndroid Build Coastguard Worker punpckldq m4, m5 2894*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m8, m1 2895*c0909341SAndroid Build Coastguard Worker punpckldq m8, m1 2896*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 2897*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 2898*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m3, 0x31 2899*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm3, 1 2900*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m1, m7, 0x31 2901*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm7, 1 2902*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m5, m6, 0x31 2903*c0909341SAndroid Build Coastguard Worker vinserti128 m5, xm6, 1 2904*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m8, m4, 0x31 2905*c0909341SAndroid Build Coastguard Worker vinserti128 m4, m8, xm4, 1 2906*c0909341SAndroid Build Coastguard Worker ret 2907*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2908*c0909341SAndroid Build Coastguard Worker.pass1_main: 2909*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+32* 0] 2910*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+32* 2] 2911*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+32* 4] 2912*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+32* 6] 2913*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+32* 8] 2914*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+32*10] 2915*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+32*12] 2916*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+32*14] 2917*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_rect2 2918*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_10bpc).round_shift1 2919*c0909341SAndroid Build Coastguard WorkerALIGN function_align 2920*c0909341SAndroid Build Coastguard Worker.main_evenhalf: 2921*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; idct8 out1 2922*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; idct8 out6 2923*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m9 ; idct8 out7 2924*c0909341SAndroid Build Coastguard Worker paddd m0, m9 ; idct8 out0 2925*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m4 ; idct8 out2 2926*c0909341SAndroid Build Coastguard Worker psubd m5, m4 ; idct8 out5 2927*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; idct8 out4 2928*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; idct8 out3 2929*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 2930*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 2931*c0909341SAndroid Build Coastguard Worker ret 2932*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast_rect2: 2933*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 2934*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 2935*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast: ; lower half zero 2936*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_4076] 2937*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_401] 2938*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_m1189] 2939*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_3920] 2940*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_3612] 2941*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1931] 2942*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_m2598] 2943*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3166] 2944*c0909341SAndroid Build Coastguard Worker pmulld m7, m0 2945*c0909341SAndroid Build Coastguard Worker pmulld m0, m8 2946*c0909341SAndroid Build Coastguard Worker pmulld m6, m1 2947*c0909341SAndroid Build Coastguard Worker pmulld m1, m9 2948*c0909341SAndroid Build Coastguard Worker pmulld m5, m2 2949*c0909341SAndroid Build Coastguard Worker pmulld m2, m10 2950*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 2951*c0909341SAndroid Build Coastguard Worker pmulld m3, m15 2952*c0909341SAndroid Build Coastguard Worker jmp .main_oddhalf_fast2 2953*c0909341SAndroid Build Coastguard Worker.main_oddhalf_rect2: 2954*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 2955*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 2956*c0909341SAndroid Build Coastguard Worker.main_oddhalf: 2957*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 0, 7, 8, 9, 10, _, 401, 4076 ; t8a, t15a 2958*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 1, 8, 9, 10, _, 3920, 1189 ; t11a, t12a 2959*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 5, 8, 9, 10, _, 1931, 3612 ; t10a, t13a 2960*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 3, 8, 9, 10, _, 3166, 2598 ; t9a, t14a 2961*c0909341SAndroid Build Coastguard Worker.main_oddhalf_fast2: 2962*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3 2963*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3 2964*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t9 2965*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t8 2966*c0909341SAndroid Build Coastguard Worker psubd m4, m6, m2 ; t10 2967*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t11 2968*c0909341SAndroid Build Coastguard Worker psubd m6, m1, m5 ; t13 2969*c0909341SAndroid Build Coastguard Worker paddd m5, m1 ; t12 2970*c0909341SAndroid Build Coastguard Worker psubd m1, m7, m3 ; t14 2971*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t15 2972*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m1, m4, m6, m0, m2, m5, m7 2973*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m1, m4, m6, m0, m2, m5, m7 2974*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3784] 2975*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 2976*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 8, 3, 9, _, 11, 10, 15 2977*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 4, 3, 9, _, 11, 10, 15, 2 2978*c0909341SAndroid Build Coastguard Worker psubd m3, m1, m4 ; t10 2979*c0909341SAndroid Build Coastguard Worker paddd m1, m4 ; t9 2980*c0909341SAndroid Build Coastguard Worker psubd m4, m0, m2 ; t11a 2981*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t8a 2982*c0909341SAndroid Build Coastguard Worker psubd m2, m8, m6 ; t13 2983*c0909341SAndroid Build Coastguard Worker paddd m6, m8 ; t14 2984*c0909341SAndroid Build Coastguard Worker psubd m8, m7, m5 ; t12a 2985*c0909341SAndroid Build Coastguard Worker paddd m7, m5 ; t15a 2986*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m2, m8, m3, m4, m0, m1, m6, m7 2987*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m2, m8, m3, m4, m0, m1, m6, m7 2988*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m14}, m2, m8, m3, m4 2989*c0909341SAndroid Build Coastguard Worker paddd m2, m11 2990*c0909341SAndroid Build Coastguard Worker paddd m8, m11 2991*c0909341SAndroid Build Coastguard Worker paddd m5, m2, m3 ; t13a 2992*c0909341SAndroid Build Coastguard Worker psubd m2, m3 ; t10a 2993*c0909341SAndroid Build Coastguard Worker psubd m3, m8, m4 ; t11 2994*c0909341SAndroid Build Coastguard Worker paddd m4, m8 ; t12 2995*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m5, m2, m3, m4 2996*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m7 2997*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m6 2998*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m5 2999*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m4 3000*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m3 3001*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m2 3002*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m1 3003*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m0 3004*c0909341SAndroid Build Coastguard Worker ret 3005*c0909341SAndroid Build Coastguard Worker 3006*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct 3007*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst 3008*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst 3009*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity, 35 3010*c0909341SAndroid Build Coastguard Worker 3011*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 3012*c0909341SAndroid Build Coastguard Worker%undef cmp 3013*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3014*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3015*c0909341SAndroid Build Coastguard Worker.pass1: 3016*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 3017*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 3018*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 3019*c0909341SAndroid Build Coastguard Worker jl .fast 3020*c0909341SAndroid Build Coastguard Worker add cq, 32 3021*c0909341SAndroid Build Coastguard Worker call .pass1_main 3022*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main_end 3023*c0909341SAndroid Build Coastguard Worker sub cq, 32 3024*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m0 3025*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m1 3026*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m2 3027*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m3 3028*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m4 3029*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m5 3030*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m6 3031*c0909341SAndroid Build Coastguard Worker mova m15, m7 3032*c0909341SAndroid Build Coastguard Worker call .pass1_main 3033*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main_end 3034*c0909341SAndroid Build Coastguard Worker mova m8, [cq+32* 1] 3035*c0909341SAndroid Build Coastguard Worker mova m9, [cq+32* 3] 3036*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32* 5] 3037*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32* 7] 3038*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32* 9] 3039*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*11] 3040*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*13] 3041*c0909341SAndroid Build Coastguard Worker jmp tx2q 3042*c0909341SAndroid Build Coastguard Worker.fast: 3043*c0909341SAndroid Build Coastguard Worker call .pass1_main 3044*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_10bpc).main_end 3045*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3046*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 3047*c0909341SAndroid Build Coastguard Worker jmp tx2q 3048*c0909341SAndroid Build Coastguard Worker.pass2: 3049*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).transpose 3050*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main 3051*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main_pass2_end 3052*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_2048] 3053*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm12, [pw_4096] 3054*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q2031}, m0, m1, m2, m3 3055*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m4, m5, m6, m7 3056*c0909341SAndroid Build Coastguard Worker psubw m12, m8 3057*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).end 3058*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3059*c0909341SAndroid Build Coastguard Worker.pass1_main: 3060*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+32* 0] 3061*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+32*14] 3062*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+32* 2] 3063*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+32*12] 3064*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+32* 4] 3065*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+32*10] 3066*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+32* 6] 3067*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+32* 8] 3068*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 3069*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 3070*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x8_internal_10bpc).main2 3071*c0909341SAndroid Build Coastguard Worker 3072*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct 3073*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst 3074*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst 3075*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity, 35 3076*c0909341SAndroid Build Coastguard Worker 3077*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 3078*c0909341SAndroid Build Coastguard Worker%undef cmp 3079*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3080*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3081*c0909341SAndroid Build Coastguard Worker.pass1: 3082*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 3083*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 3084*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 3085*c0909341SAndroid Build Coastguard Worker jl .fast 3086*c0909341SAndroid Build Coastguard Worker add cq, 32 3087*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).pass1_main 3088*c0909341SAndroid Build Coastguard Worker call m(iflipadst_8x8_internal_10bpc).main_end 3089*c0909341SAndroid Build Coastguard Worker sub cq, 32 3090*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m0 3091*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m1 3092*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m2 3093*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m3 3094*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m4 3095*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m5 3096*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m6 3097*c0909341SAndroid Build Coastguard Worker mova m15, m7 3098*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).pass1_main 3099*c0909341SAndroid Build Coastguard Worker call m(iflipadst_8x8_internal_10bpc).main_end 3100*c0909341SAndroid Build Coastguard Worker mova m8, [cq+32* 1] 3101*c0909341SAndroid Build Coastguard Worker mova m9, [cq+32* 3] 3102*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32* 5] 3103*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32* 7] 3104*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32* 9] 3105*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*11] 3106*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*13] 3107*c0909341SAndroid Build Coastguard Worker jmp tx2q 3108*c0909341SAndroid Build Coastguard Worker.fast: 3109*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_10bpc).pass1_main 3110*c0909341SAndroid Build Coastguard Worker call m(iflipadst_8x8_internal_10bpc).main_end 3111*c0909341SAndroid Build Coastguard Worker pxor m8, m8 3112*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 3113*c0909341SAndroid Build Coastguard Worker jmp tx2q 3114*c0909341SAndroid Build Coastguard Worker.pass2: 3115*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).transpose 3116*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main 3117*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_8bpc).main_pass2_end 3118*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 3119*c0909341SAndroid Build Coastguard Worker vpbroadcastd xm13, [pw_4096] 3120*c0909341SAndroid Build Coastguard Worker mova m11, m0 3121*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, q2031 3122*c0909341SAndroid Build Coastguard Worker mova m10, m1 3123*c0909341SAndroid Build Coastguard Worker vpermq m1, m6, q2031 3124*c0909341SAndroid Build Coastguard Worker mova m9, m2 3125*c0909341SAndroid Build Coastguard Worker vpermq m2, m5, q2031 3126*c0909341SAndroid Build Coastguard Worker mova m8, m3 3127*c0909341SAndroid Build Coastguard Worker vpermq m3, m4, q2031 3128*c0909341SAndroid Build Coastguard Worker vpermq m4, m8, q3120 3129*c0909341SAndroid Build Coastguard Worker vpermq m5, m9, q3120 3130*c0909341SAndroid Build Coastguard Worker vpermq m6, m10, q3120 3131*c0909341SAndroid Build Coastguard Worker vpermq m7, m11, q3120 3132*c0909341SAndroid Build Coastguard Worker psubw m12, m13 3133*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).end 3134*c0909341SAndroid Build Coastguard Worker 3135*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct 3136*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst 3137*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst 3138*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity 3139*c0909341SAndroid Build Coastguard Worker 3140*c0909341SAndroid Build Coastguard Worker%macro IDTX16 3-4 ; src/dst, tmp, pw_1697x16, [pw_16384] 3141*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%3, m%1 3142*c0909341SAndroid Build Coastguard Worker%if %0 == 4 ; if downshifting by 1 3143*c0909341SAndroid Build Coastguard Worker%ifnum %4 3144*c0909341SAndroid Build Coastguard Worker pmulhrsw m%2, m%4 3145*c0909341SAndroid Build Coastguard Worker%else ; without rounding 3146*c0909341SAndroid Build Coastguard Worker psraw m%2, 1 3147*c0909341SAndroid Build Coastguard Worker%endif 3148*c0909341SAndroid Build Coastguard Worker%else 3149*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%1 3150*c0909341SAndroid Build Coastguard Worker%endif 3151*c0909341SAndroid Build Coastguard Worker paddsw m%1, m%2 3152*c0909341SAndroid Build Coastguard Worker%endmacro 3153*c0909341SAndroid Build Coastguard Worker 3154*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_10bpc, 0, 7, 16, dst, stride, c, eob, tx2 3155*c0909341SAndroid Build Coastguard Worker.pass1: 3156*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 3157*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+32* 0] 3158*c0909341SAndroid Build Coastguard Worker pmulld m8, m15, [cq+32* 1] 3159*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+32* 2] 3160*c0909341SAndroid Build Coastguard Worker pmulld m9, m15, [cq+32* 3] 3161*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+32* 4] 3162*c0909341SAndroid Build Coastguard Worker pmulld m10, m15, [cq+32* 5] 3163*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+32* 6] 3164*c0909341SAndroid Build Coastguard Worker pmulld m11, m15, [cq+32* 7] 3165*c0909341SAndroid Build Coastguard Worker pmulld m4, m15, [cq+32* 8] 3166*c0909341SAndroid Build Coastguard Worker pmulld m12, m15, [cq+32* 9] 3167*c0909341SAndroid Build Coastguard Worker pmulld m5, m15, [cq+32*10] 3168*c0909341SAndroid Build Coastguard Worker pmulld m13, m15, [cq+32*11] 3169*c0909341SAndroid Build Coastguard Worker pmulld m6, m15, [cq+32*12] 3170*c0909341SAndroid Build Coastguard Worker pmulld m14, m15, [cq+32*13] 3171*c0909341SAndroid Build Coastguard Worker pmulld m7, m15, [cq+32*14] 3172*c0909341SAndroid Build Coastguard Worker pmulld m15, [cq+32*15] 3173*c0909341SAndroid Build Coastguard Worker mova [cq], m7 3174*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 3175*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3, m4, m5, m6, \ 3176*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3177*c0909341SAndroid Build Coastguard Worker paddd m7, [cq] 3178*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3179*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3180*c0909341SAndroid Build Coastguard Worker jmp tx2q 3181*c0909341SAndroid Build Coastguard Worker.pass2: 3182*c0909341SAndroid Build Coastguard Worker packssdw m0, m8 3183*c0909341SAndroid Build Coastguard Worker packssdw m1, m9 3184*c0909341SAndroid Build Coastguard Worker packssdw m2, m10 3185*c0909341SAndroid Build Coastguard Worker packssdw m3, m11 3186*c0909341SAndroid Build Coastguard Worker packssdw m4, m12 3187*c0909341SAndroid Build Coastguard Worker packssdw m5, m13 3188*c0909341SAndroid Build Coastguard Worker packssdw m6, m14 3189*c0909341SAndroid Build Coastguard Worker packssdw m13, m7, m15 3190*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_1697x16] 3191*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 9, 8}, 0, 1, 2, 3, 4, 5, 6, 13 3192*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 3193*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 3194*c0909341SAndroid Build Coastguard Worker call .pass2_end 3195*c0909341SAndroid Build Coastguard Worker RET 3196*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3197*c0909341SAndroid Build Coastguard Worker.pass2_end: 3198*c0909341SAndroid Build Coastguard Worker punpckhwd m9, m0, m1 3199*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 3200*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m6, m13 3201*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m13 3202*c0909341SAndroid Build Coastguard Worker punpckhwd m13, m4, m5 3203*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 3204*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m2, m3 3205*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 3206*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m5 3207*c0909341SAndroid Build Coastguard Worker punpckldq m0, m5 3208*c0909341SAndroid Build Coastguard Worker punpckhdq m11, m9, m2 3209*c0909341SAndroid Build Coastguard Worker punpckldq m9, m2 3210*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 3211*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 3212*c0909341SAndroid Build Coastguard Worker punpckldq m6, m13, m1 3213*c0909341SAndroid Build Coastguard Worker punpckhdq m13, m1 3214*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 3215*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 3216*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 3217*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 3218*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m9, m6 3219*c0909341SAndroid Build Coastguard Worker punpckhqdq m9, m6 3220*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m11, m13 3221*c0909341SAndroid Build Coastguard Worker punpckhqdq m11, m13 3222*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 3223*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 3224*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x8_internal_10bpc).write_2x8x2_start 3225*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m2 3226*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m3 3227*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero 3228*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m8 3229*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m9 3230*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 3231*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero 3232*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m10 3233*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m11 3234*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x8_internal_10bpc).write_2x8x2_zero 3235*c0909341SAndroid Build Coastguard Worker ret 3236*c0909341SAndroid Build Coastguard Worker 3237*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, dct, 0, 12 3238*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, identity, 35, 12 3239*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, adst, 0, 12 3240*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN dct, flipadst, 0, 12 3241*c0909341SAndroid Build Coastguard Worker 3242*c0909341SAndroid Build Coastguard Workercglobal idct_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 3243*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 3244*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 3245*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_10bpc).pass1 3246*c0909341SAndroid Build Coastguard Worker.pass2: 3247*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 3248*c0909341SAndroid Build Coastguard Worker call .transpose 3249*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3250*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3251*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m0 3252*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m2 3253*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m4 3254*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m6 3255*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+32* 1] 3256*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, m1 3257*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+32* 3] 3258*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+32* 5] 3259*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, m5 3260*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, m3 3261*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+32* 7] 3262*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12 3263*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 3264*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 3265*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 3266*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 3267*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+32* 0] 3268*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+32* 2] 3269*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+32* 4] 3270*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+32* 6] 3271*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+32* 8] 3272*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+32*10] 3273*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+32*12] 3274*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+32*14] 3275*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 3276*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 3277*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 3278*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_8] 3279*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 3280*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).pass1_rotations 3281*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3282*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3283*c0909341SAndroid Build Coastguard Worker.end: 3284*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 3285*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 3286*c0909341SAndroid Build Coastguard Worker packssdw m2, m4, m5 3287*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 3288*c0909341SAndroid Build Coastguard Worker packssdw m4, m8, m9 3289*c0909341SAndroid Build Coastguard Worker packssdw m5, m10, m11 3290*c0909341SAndroid Build Coastguard Worker packssdw m6, m12, m13 3291*c0909341SAndroid Build Coastguard Worker packssdw m7, m14, m15 3292*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 3293*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q3120 3294*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).write_8x4_start 3295*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 3296*c0909341SAndroid Build Coastguard Worker vpermq m0, m2, q3120 3297*c0909341SAndroid Build Coastguard Worker vpermq m1, m3, q3120 3298*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 3299*c0909341SAndroid Build Coastguard Worker vpermq m0, m4, q3120 3300*c0909341SAndroid Build Coastguard Worker vpermq m1, m5, q3120 3301*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 3302*c0909341SAndroid Build Coastguard Worker vpermq m0, m6, q3120 3303*c0909341SAndroid Build Coastguard Worker vpermq m1, m7, q3120 3304*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 3305*c0909341SAndroid Build Coastguard Worker RET 3306*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3307*c0909341SAndroid Build Coastguard Worker.transpose: 3308*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m8 3309*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m9 3310*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m10 3311*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m11 3312*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).transpose_8x8 3313*c0909341SAndroid Build Coastguard Worker mova [cq+32* 0], m0 3314*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m1 3315*c0909341SAndroid Build Coastguard Worker mova [cq+32* 2], m2 3316*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m3 3317*c0909341SAndroid Build Coastguard Worker mova [cq+32* 4], m4 3318*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m5 3319*c0909341SAndroid Build Coastguard Worker mova [cq+32* 6], m6 3320*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m7 3321*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 8] 3322*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 9] 3323*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*10] 3324*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*11] 3325*c0909341SAndroid Build Coastguard Worker mova m4, m12 3326*c0909341SAndroid Build Coastguard Worker mova m5, m13 3327*c0909341SAndroid Build Coastguard Worker mova m6, m14 3328*c0909341SAndroid Build Coastguard Worker mova m7, m15 3329*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x8_internal_12bpc).transpose_8x8 3330*c0909341SAndroid Build Coastguard Worker 3331*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, dct, 0, 12 3332*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, adst, 0, 12 3333*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, flipadst, 0, 12 3334*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN adst, identity, 35, 12 3335*c0909341SAndroid Build Coastguard Worker 3336*c0909341SAndroid Build Coastguard Workercglobal iadst_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 3337*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 3338*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 3339*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_10bpc).pass1 3340*c0909341SAndroid Build Coastguard Worker.pass2: 3341*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 3342*c0909341SAndroid Build Coastguard Worker call .pass2_main 3343*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).pass1_rotations 3344*c0909341SAndroid Build Coastguard Worker.pass2_end: 3345*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4 }, m0, m1, m2, m3, m12, m13, m14, m15 3346*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m4, m5, m6, m7, m8, m9, m10, m11 3347*c0909341SAndroid Build Coastguard Worker jmp m(idct_8x16_internal_12bpc).end 3348*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3349*c0909341SAndroid Build Coastguard Worker.pass2_main: 3350*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_12bpc).transpose 3351*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 3352*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 3353*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m0 3354*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m3 3355*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m4 3356*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m7 3357*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m13, [cq+32* 2] ; 2 3358*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m13, m1 ; 9 3359*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m13, m5 ; 13 3360*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m13, m2 ; 10 3361*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m13, [cq+32* 6] ; 6 3362*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m13, [cq+32* 5] ; 5 3363*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m13, m6 ; 14 3364*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m13, [cq+32* 1] ; 1 3365*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m0, m1, m2, m3, m4, m5, m6, m7 3366*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_2048] 3367*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 3368*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_part1 3369*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m13, [cq+32* 0] ; 0 3370*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m13, [cq+32*15] ; 15 3371*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m13, [cq+32* 4] ; 4 3372*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m13, [cq+32*11] ; 11 3373*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m13, [cq+32* 8] ; 8 3374*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m13, [cq+32* 7] ; 7 3375*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m13, [cq+32*12] ; 12 3376*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m13, [cq+32* 3] ; 3 3377*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m0, m1, m2, m3, m4, m5, m6, m7 3378*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_part2 3379*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_17408] 3380*c0909341SAndroid Build Coastguard Worker psrld m15, 11 ; pd_1 3381*c0909341SAndroid Build Coastguard Worker psubd m13, m14, m15 ; pd_17407 3382*c0909341SAndroid Build Coastguard Worker pslld m15, 3 ; pd_8 3383*c0909341SAndroid Build Coastguard Worker ret 3384*c0909341SAndroid Build Coastguard Worker 3385*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, dct, 0, 12 3386*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, adst, 0, 12 3387*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, flipadst, 0, 12 3388*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN flipadst, identity, 35, 12 3389*c0909341SAndroid Build Coastguard Worker 3390*c0909341SAndroid Build Coastguard Workercglobal iflipadst_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 3391*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 3392*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 3393*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_8x16_internal_10bpc).pass1 3394*c0909341SAndroid Build Coastguard Worker.pass2: 3395*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 3396*c0909341SAndroid Build Coastguard Worker call m(iadst_8x16_internal_12bpc).pass2_main 3397*c0909341SAndroid Build Coastguard Worker call m(iflipadst_16x8_internal_10bpc).pass1_rotations 3398*c0909341SAndroid Build Coastguard Worker jmp m(iadst_8x16_internal_12bpc).pass2_end 3399*c0909341SAndroid Build Coastguard Worker 3400*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, dct, 0, 12 3401*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, adst, 0, 12 3402*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, flipadst, 0, 12 3403*c0909341SAndroid Build Coastguard WorkerINV_TXFM_8X16_FN identity, identity, 0, 12 3404*c0909341SAndroid Build Coastguard Worker 3405*c0909341SAndroid Build Coastguard Workercglobal iidentity_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 3406*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x16_internal_10bpc).pass1 3407*c0909341SAndroid Build Coastguard Worker.pass2: 3408*c0909341SAndroid Build Coastguard Worker call .pass2_main 3409*c0909341SAndroid Build Coastguard Worker packssdw m0, m8 3410*c0909341SAndroid Build Coastguard Worker packssdw m1, m9 3411*c0909341SAndroid Build Coastguard Worker packssdw m2, m10 3412*c0909341SAndroid Build Coastguard Worker packssdw m3, m11 3413*c0909341SAndroid Build Coastguard Worker packssdw m4, m12 3414*c0909341SAndroid Build Coastguard Worker packssdw m5, m13 3415*c0909341SAndroid Build Coastguard Worker packssdw m6, m14 3416*c0909341SAndroid Build Coastguard Worker packssdw m13, m7, m15 3417*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 3418*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_16384] 3419*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x16_internal_10bpc).pass2_end 3420*c0909341SAndroid Build Coastguard Worker RET 3421*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3422*c0909341SAndroid Build Coastguard Worker.pass2_main: 3423*c0909341SAndroid Build Coastguard Worker mova [cq], m7 3424*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [clip_18b_min] 3425*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m7}, m0, m1, m2, m3, m4, m5, m6, \ 3426*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3427*c0909341SAndroid Build Coastguard Worker pmaxsd m7, [cq] 3428*c0909341SAndroid Build Coastguard Worker mova [cq], m15 3429*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [clip_18b_max] 3430*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3431*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14 3432*c0909341SAndroid Build Coastguard Worker pminsd m15, [cq] 3433*c0909341SAndroid Build Coastguard Worker mova [cq], m7 3434*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_5793] 3435*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m7}, m0, m1, m2, m3, m4, m5, m6, \ 3436*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3437*c0909341SAndroid Build Coastguard Worker pmulld m7, [cq] 3438*c0909341SAndroid Build Coastguard Worker mova [cq], m15 3439*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_1024] 3440*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3441*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14 3442*c0909341SAndroid Build Coastguard Worker paddd m15, [cq] 3443*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 14}, m0, m1, m2, m3, m4, m5, m6, m7, \ 3444*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 3445*c0909341SAndroid Build Coastguard Worker ret 3446*c0909341SAndroid Build Coastguard Worker 3447*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X4_FN 2-3 10 ; type1, type2, bitdepth 3448*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 16x4, %3 3449*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 3450*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_%3bpc] 3451*c0909341SAndroid Build Coastguard Worker%if %3 = 10 3452*c0909341SAndroid Build Coastguard Worker.dconly: 3453*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 3454*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 3455*c0909341SAndroid Build Coastguard Worker or r3d, 4 3456*c0909341SAndroid Build Coastguard Worker.dconly2: 3457*c0909341SAndroid Build Coastguard Worker add r6d, 384 3458*c0909341SAndroid Build Coastguard Worker sar r6d, 9 3459*c0909341SAndroid Build Coastguard Worker.dconly3: 3460*c0909341SAndroid Build Coastguard Worker imul r6d, 181 3461*c0909341SAndroid Build Coastguard Worker add r6d, 2176 3462*c0909341SAndroid Build Coastguard Worker sar r6d, 12 3463*c0909341SAndroid Build Coastguard Worker movd xm0, r6d 3464*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm3 3465*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 3466*c0909341SAndroid Build Coastguard Worker.dconly_loop: 3467*c0909341SAndroid Build Coastguard Worker paddsw m1, m0, [dstq+strideq*0] 3468*c0909341SAndroid Build Coastguard Worker paddsw m2, m0, [dstq+strideq*1] 3469*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 3470*c0909341SAndroid Build Coastguard Worker psubusw m2, m3 3471*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m1 3472*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m2 3473*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*2] 3474*c0909341SAndroid Build Coastguard Worker sub r3d, 2 3475*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 3476*c0909341SAndroid Build Coastguard Worker RET 3477*c0909341SAndroid Build Coastguard Worker%else 3478*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly 3479*c0909341SAndroid Build Coastguard Worker%endif 3480*c0909341SAndroid Build Coastguard Worker%endif 3481*c0909341SAndroid Build Coastguard Worker%endmacro 3482*c0909341SAndroid Build Coastguard Worker 3483*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct 3484*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity 3485*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst 3486*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst 3487*c0909341SAndroid Build Coastguard Worker 3488*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 3489*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_18b_min] 3490*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_18b_max] 3491*c0909341SAndroid Build Coastguard Worker.pass1: 3492*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m0, [cq+16* 0] 3493*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16* 4] 3494*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m1, [cq+16* 2] 3495*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m7, [cq+16* 6] 3496*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16*10] 3497*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m2, [cq+16* 8] 3498*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [cq+16*12] 3499*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m3, [cq+16*14] 3500*c0909341SAndroid Build Coastguard Worker shufpd m0, m4, 0x0c ; 0 4 3501*c0909341SAndroid Build Coastguard Worker shufpd m1, m5, 0x0c ; 2 10 3502*c0909341SAndroid Build Coastguard Worker shufpd m2, m6, 0x0c ; 8 12 3503*c0909341SAndroid Build Coastguard Worker shufpd m3, m7, 0x0c ; 14 6 3504*c0909341SAndroid Build Coastguard Worker call .pass1_main 3505*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m10, [cq+16* 1] 3506*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16* 5] 3507*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m11, [cq+16*15] 3508*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16*11] 3509*c0909341SAndroid Build Coastguard Worker shufpd m10, m4, 0x0c ; 1 5 3510*c0909341SAndroid Build Coastguard Worker shufpd m11, m5, 0x0c ; 15 11 3511*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m5, [cq+16* 9] 3512*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16*13] 3513*c0909341SAndroid Build Coastguard Worker shufpd m5, m4, 0x0c ; 9 13 3514*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [cq+16* 7] 3515*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m4, [cq+16* 3] 3516*c0909341SAndroid Build Coastguard Worker shufpd m6, m4, 0x0c ; 7 3 3517*c0909341SAndroid Build Coastguard Worker call .pass1_main2 3518*c0909341SAndroid Build Coastguard Worker pcmpeqd m4, m4 3519*c0909341SAndroid Build Coastguard Worker REPX {psubd x, m4}, m0, m1, m2, m3 3520*c0909341SAndroid Build Coastguard Worker call .pass1_main3 3521*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m1, m2, m3, m4, m5, m6, m7 3522*c0909341SAndroid Build Coastguard Worker jmp tx2q 3523*c0909341SAndroid Build Coastguard Worker.pass2: 3524*c0909341SAndroid Build Coastguard Worker call .transpose_4x16_packed 3525*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 3526*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_8bpc).main 3527*c0909341SAndroid Build Coastguard Worker.end: 3528*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_2048] 3529*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m0, m1, m2, m3 3530*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_10bpc_max] 3531*c0909341SAndroid Build Coastguard Worker.end2: 3532*c0909341SAndroid Build Coastguard Worker paddw m0, [dstq+strideq*0] 3533*c0909341SAndroid Build Coastguard Worker paddw m1, [dstq+strideq*1] 3534*c0909341SAndroid Build Coastguard Worker.end3: 3535*c0909341SAndroid Build Coastguard Worker lea r6, [dstq+strideq*2] 3536*c0909341SAndroid Build Coastguard Worker paddw m2, [r6 +strideq*0] 3537*c0909341SAndroid Build Coastguard Worker paddw m3, [r6 +strideq*1] 3538*c0909341SAndroid Build Coastguard Worker pxor m4, m4 3539*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m4}, 0, 1, 2, 3, 4, 5, 6, 7 3540*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m4}, m0, m1, m2, m3 3541*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m5}, m0, m1, m2, m3 3542*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 3543*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 3544*c0909341SAndroid Build Coastguard Worker mova [r6 +strideq*0], m2 3545*c0909341SAndroid Build Coastguard Worker mova [r6 +strideq*1], m3 3546*c0909341SAndroid Build Coastguard Worker RET 3547*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3548*c0909341SAndroid Build Coastguard Worker.pass1_main: 3549*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 3550*c0909341SAndroid Build Coastguard Worker call m(idct_8x4_internal_10bpc).main 3551*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m4 ; idct8 out7 out6 3552*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; idct8 out0 out1 3553*c0909341SAndroid Build Coastguard Worker paddd m1, m2, m5 ; idct8 out3 out2 3554*c0909341SAndroid Build Coastguard Worker psubd m2, m5 ; idct8 out4 out5 3555*c0909341SAndroid Build Coastguard Worker ret 3556*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3557*c0909341SAndroid Build Coastguard Worker.pass1_main2: 3558*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 10, 11, 4, 12, 13, 7, 401_1931, 4076_3612, 1 3559*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 6, 4, 12, 13, 7, 3166_3920, 2598_1189, 1 3560*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m12, [pd_3784_m3784] 3561*c0909341SAndroid Build Coastguard Worker psubd m4, m10, m5 3562*c0909341SAndroid Build Coastguard Worker paddd m10, m5 ; t8 t11 3563*c0909341SAndroid Build Coastguard Worker psignd m4, m12 ; t9 t10 3564*c0909341SAndroid Build Coastguard Worker psubd m5, m11, m6 3565*c0909341SAndroid Build Coastguard Worker paddd m11, m6 ; t15 t12 3566*c0909341SAndroid Build Coastguard Worker psignd m5, m12 ; t14 t13 3567*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_1567] 3568*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pd_3784] 3569*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m5, m4 3570*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m5, m4 3571*c0909341SAndroid Build Coastguard Worker pmulld m12, m5 3572*c0909341SAndroid Build Coastguard Worker pmulld m5, m6 3573*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m6, [pd_1567_m1567] 3574*c0909341SAndroid Build Coastguard Worker pmulld m13, m4 3575*c0909341SAndroid Build Coastguard Worker pmulld m4, m6 3576*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m10, m11, m0, m1 3577*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m10, m11, m0, m1 3578*c0909341SAndroid Build Coastguard Worker paddd m12, m7 3579*c0909341SAndroid Build Coastguard Worker paddd m5, m7 3580*c0909341SAndroid Build Coastguard Worker paddd m4, m12 3581*c0909341SAndroid Build Coastguard Worker psubd m5, m13 3582*c0909341SAndroid Build Coastguard Worker psrad m4, 12 ; t14a t10a 3583*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; t9a t13a 3584*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_2896] 3585*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m11, m5 3586*c0909341SAndroid Build Coastguard Worker punpcklqdq m11, m4 3587*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m10, m4 3588*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m5 3589*c0909341SAndroid Build Coastguard Worker psubd m5, m11, m6 ; t12a t13 3590*c0909341SAndroid Build Coastguard Worker paddd m11, m6 ; t15a t14 3591*c0909341SAndroid Build Coastguard Worker psubd m6, m10, m4 ; t11a t10 3592*c0909341SAndroid Build Coastguard Worker paddd m10, m4 ; t8a t9 3593*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m5, m6 3594*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m5, m6 3595*c0909341SAndroid Build Coastguard Worker pmulld m5, m12 3596*c0909341SAndroid Build Coastguard Worker pmulld m6, m12 3597*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m8}, m2, m3, m11, m10 3598*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m9}, m2, m3, m11, m10 3599*c0909341SAndroid Build Coastguard Worker ret 3600*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3601*c0909341SAndroid Build Coastguard Worker.pass1_main3: 3602*c0909341SAndroid Build Coastguard Worker paddd m5, m7 3603*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m6 3604*c0909341SAndroid Build Coastguard Worker paddd m5, m6 3605*c0909341SAndroid Build Coastguard Worker psrad m4, 12 ; t11 t10a 3606*c0909341SAndroid Build Coastguard Worker psrad m5, 12 ; t12 t13a 3607*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m11 ; out15 out14 3608*c0909341SAndroid Build Coastguard Worker paddd m0, m11 ; out0 out1 3609*c0909341SAndroid Build Coastguard Worker psubd m6, m1, m5 ; out12 out13 3610*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; out3 out2 3611*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m4 ; out11 out10 3612*c0909341SAndroid Build Coastguard Worker paddd m2, m4 ; out4 out5 3613*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m10 ; out8 out9 3614*c0909341SAndroid Build Coastguard Worker paddd m3, m10 ; out7 out6 3615*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q1032}, m1, m3, m5, m7 3616*c0909341SAndroid Build Coastguard Worker ret 3617*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3618*c0909341SAndroid Build Coastguard Worker.transpose_4x16_packed: 3619*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m8, [deint_shuf] 3620*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 3621*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 3622*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 3623*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 3624*c0909341SAndroid Build Coastguard Worker REPX {pshufb x, m8}, m0, m2, m4, m6 3625*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 3626*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 3627*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m4, m6 3628*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m6 3629*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m1, m2, 0x31 3630*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm2, 1 3631*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m4, 0x31 3632*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm4, 1 3633*c0909341SAndroid Build Coastguard Worker ret 3634*c0909341SAndroid Build Coastguard Worker 3635*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct 3636*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst 3637*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst 3638*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity 3639*c0909341SAndroid Build Coastguard Worker 3640*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 3641*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3642*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3643*c0909341SAndroid Build Coastguard Worker.pass1: 3644*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).main 3645*c0909341SAndroid Build Coastguard Worker psrad m11, 11 ; pd_1 3646*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 3647*c0909341SAndroid Build Coastguard Worker paddd m4, m5, m11 3648*c0909341SAndroid Build Coastguard Worker paddd m5, m6, m11 3649*c0909341SAndroid Build Coastguard Worker paddd m6, m7, m11 3650*c0909341SAndroid Build Coastguard Worker paddd m7, m8, m11 3651*c0909341SAndroid Build Coastguard Worker.pass1_end: 3652*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q1032}, m0, m2, m4, m6 3653*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7 3654*c0909341SAndroid Build Coastguard Worker jmp tx2q 3655*c0909341SAndroid Build Coastguard Worker.pass2: 3656*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).transpose_4x16_packed 3657*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 3658*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 3659*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end 3660*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3661*c0909341SAndroid Build Coastguard Worker.main: 3662*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_1321] 3663*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 3664*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 3665*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2482] 3666*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*6] 3667*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*7] 3668*c0909341SAndroid Build Coastguard Worker pmulld m4, m0, m6 3669*c0909341SAndroid Build Coastguard Worker pmulld m5, m1, m6 ; 1321*in0 3670*c0909341SAndroid Build Coastguard Worker pmulld m9, m2, m7 3671*c0909341SAndroid Build Coastguard Worker pmulld m8, m3, m7 ; 2482*in3 3672*c0909341SAndroid Build Coastguard Worker paddd m4, m9 3673*c0909341SAndroid Build Coastguard Worker paddd m8, m5 ; 1321*in0 + 2482*in3 3674*c0909341SAndroid Build Coastguard Worker pmulld m5, m0, m7 3675*c0909341SAndroid Build Coastguard Worker pmulld m9, m1, m7 ; 2482*in0 3676*c0909341SAndroid Build Coastguard Worker paddd m0, m2 3677*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; in0 + in3 3678*c0909341SAndroid Build Coastguard Worker paddd m7, m6 ; pd_3803 3679*c0909341SAndroid Build Coastguard Worker pmulld m2, m7 3680*c0909341SAndroid Build Coastguard Worker pmulld m3, m7 ; 3803*in3 3681*c0909341SAndroid Build Coastguard Worker psubd m5, m2 3682*c0909341SAndroid Build Coastguard Worker psubd m9, m3 ; 2482*in0 - 3803*in3 3683*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*4] 3684*c0909341SAndroid Build Coastguard Worker pmulld m10, m7, m2 3685*c0909341SAndroid Build Coastguard Worker pmulld m3, m6, m2 3686*c0909341SAndroid Build Coastguard Worker psubd m2, m0 3687*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*5] 3688*c0909341SAndroid Build Coastguard Worker pmulld m7, m0 ; 3803*in2 3689*c0909341SAndroid Build Coastguard Worker pmulld m6, m0 ; 1321*in2 3690*c0909341SAndroid Build Coastguard Worker psubd m0, m1 ; in2 - in0 - in3 3691*c0909341SAndroid Build Coastguard Worker vpbroadcastd m1, [pd_m3344] 3692*c0909341SAndroid Build Coastguard Worker paddd m4, m10 3693*c0909341SAndroid Build Coastguard Worker paddd m7, m8 ; t0 3694*c0909341SAndroid Build Coastguard Worker psubd m5, m3 3695*c0909341SAndroid Build Coastguard Worker psubd m9, m6 ; t1 3696*c0909341SAndroid Build Coastguard Worker pmulld m2, m1 3697*c0909341SAndroid Build Coastguard Worker pmulld m0, m1 ; t2 3698*c0909341SAndroid Build Coastguard Worker pmulld m3, m1, [cq+32*2] 3699*c0909341SAndroid Build Coastguard Worker pmulld m1, [cq+32*3] ; -t3 3700*c0909341SAndroid Build Coastguard Worker ret 3701*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3702*c0909341SAndroid Build Coastguard Worker.main_end: 3703*c0909341SAndroid Build Coastguard Worker ; expects: m6 = rnd 3704*c0909341SAndroid Build Coastguard Worker paddd m5, m6 3705*c0909341SAndroid Build Coastguard Worker paddd m9, m6 3706*c0909341SAndroid Build Coastguard Worker paddd m10, m4, m5 3707*c0909341SAndroid Build Coastguard Worker paddd m4, m6 3708*c0909341SAndroid Build Coastguard Worker paddd m8, m7, m6 3709*c0909341SAndroid Build Coastguard Worker paddd m7, m9 3710*c0909341SAndroid Build Coastguard Worker psubd m4, m3 ; out0 (unshifted) 3711*c0909341SAndroid Build Coastguard Worker psubd m5, m3 ; out1 (unshifted) 3712*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; out2 (unshifted) 3713*c0909341SAndroid Build Coastguard Worker paddd m3, m10 ; out3 (unshifted) 3714*c0909341SAndroid Build Coastguard Worker psubd m8, m1 ; out4 (unshifted) 3715*c0909341SAndroid Build Coastguard Worker psubd m9, m1 ; out5 (unshifted) 3716*c0909341SAndroid Build Coastguard Worker paddd m6, m0 ; out6 (unshifted) 3717*c0909341SAndroid Build Coastguard Worker paddd m7, m1 ; out7 (unshifted) 3718*c0909341SAndroid Build Coastguard Worker ret 3719*c0909341SAndroid Build Coastguard Worker 3720*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct 3721*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst 3722*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst 3723*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity 3724*c0909341SAndroid Build Coastguard Worker 3725*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 3726*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3727*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3728*c0909341SAndroid Build Coastguard Worker.pass1: 3729*c0909341SAndroid Build Coastguard Worker call m(iadst_4x16_internal_10bpc).main 3730*c0909341SAndroid Build Coastguard Worker psrad m11, 11 ; pd_1 3731*c0909341SAndroid Build Coastguard Worker paddd m4, m3, m11 3732*c0909341SAndroid Build Coastguard Worker paddd m3, m5, m11 3733*c0909341SAndroid Build Coastguard Worker paddd m5, m2, m11 3734*c0909341SAndroid Build Coastguard Worker paddd m2, m6, m11 3735*c0909341SAndroid Build Coastguard Worker paddd m6, m1, m11 3736*c0909341SAndroid Build Coastguard Worker paddd m1, m7, m11 3737*c0909341SAndroid Build Coastguard Worker paddd m7, m0, m11 3738*c0909341SAndroid Build Coastguard Worker paddd m0, m8, m11 3739*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_10bpc).pass1_end 3740*c0909341SAndroid Build Coastguard Worker.pass2: 3741*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).transpose_4x16_packed 3742*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 3743*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_8bpc).main 3744*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_2048] 3745*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m3, m4 3746*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m2, m4 3747*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m1, m4 3748*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m0, m4 3749*c0909341SAndroid Build Coastguard Worker paddw m0, m5, [dstq+strideq*0] 3750*c0909341SAndroid Build Coastguard Worker paddw m1, m6, [dstq+strideq*1] 3751*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_10bpc_max] 3752*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end3 3753*c0909341SAndroid Build Coastguard Worker 3754*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct 3755*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst 3756*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst 3757*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity 3758*c0909341SAndroid Build Coastguard Worker 3759*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_10bpc, 0, 7, 14, dst, stride, c, eob, tx2 3760*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5793] 3761*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 ; 0 1 3762*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 ; 2 3 3763*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 ; 4 5 3764*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 ; 6 7 3765*c0909341SAndroid Build Coastguard Worker vpermq m4, [cq+32*4], q3120 ; 8 9 3766*c0909341SAndroid Build Coastguard Worker vpermq m5, [cq+32*5], q3120 ; a b 3767*c0909341SAndroid Build Coastguard Worker vpermq m6, [cq+32*6], q3120 ; c d 3768*c0909341SAndroid Build Coastguard Worker vpermq m7, [cq+32*7], q3120 ; e f 3769*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_3072] 3770*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 3771*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 3772*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m0, m1, m2, m3, m4, m5, m6, m7 3773*c0909341SAndroid Build Coastguard Worker jmp tx2q 3774*c0909341SAndroid Build Coastguard Worker.pass2: 3775*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).transpose_4x16_packed 3776*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pw_1697x8] 3777*c0909341SAndroid Build Coastguard Worker pmulhrsw m4, m7, m0 3778*c0909341SAndroid Build Coastguard Worker pmulhrsw m5, m7, m1 3779*c0909341SAndroid Build Coastguard Worker pmulhrsw m6, m7, m2 3780*c0909341SAndroid Build Coastguard Worker pmulhrsw m7, m3 3781*c0909341SAndroid Build Coastguard Worker paddsw m0, m4 3782*c0909341SAndroid Build Coastguard Worker paddsw m1, m5 3783*c0909341SAndroid Build Coastguard Worker paddsw m2, m6 3784*c0909341SAndroid Build Coastguard Worker paddsw m3, m7 3785*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end 3786*c0909341SAndroid Build Coastguard Worker 3787*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, dct, 12 3788*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, identity, 12 3789*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, adst, 12 3790*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN dct, flipadst, 12 3791*c0909341SAndroid Build Coastguard Worker 3792*c0909341SAndroid Build Coastguard Workercglobal idct_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 3793*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [clip_20b_min] 3794*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [clip_20b_max] 3795*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).pass1 3796*c0909341SAndroid Build Coastguard Worker.pass2: 3797*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3798*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3799*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 3800*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 3801*c0909341SAndroid Build Coastguard Worker ; deinterleave 3802*c0909341SAndroid Build Coastguard Worker REPX {pshufd x, x, q3120}, m0, m1, m2, m3, m4, m5, m6, m7 3803*c0909341SAndroid Build Coastguard Worker ; transpose 3804*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m0, m1 3805*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m1 3806*c0909341SAndroid Build Coastguard Worker punpcklqdq m9, m2, m3 3807*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m3 3808*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m4, m5 3809*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m5 3810*c0909341SAndroid Build Coastguard Worker punpcklqdq m11, m6, m7 3811*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m7 3812*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m0, m2, 0x31 ; out6 3813*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x20 ; out2 3814*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m4, m6, 0x31 ; out7 3815*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m4, m6, 0x20 ; out3 3816*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m10, m11, 0x31 ; out5 3817*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m10, m11, 0x20 ; out1 3818*c0909341SAndroid Build Coastguard Worker vperm2i128 m11, m8, m9, 0x31 ; out4 3819*c0909341SAndroid Build Coastguard Worker vperm2i128 m10, m8, m9, 0x20 ; out0 3820*c0909341SAndroid Build Coastguard Worker call m(idct_4x16_internal_10bpc).pass1_main 3821*c0909341SAndroid Build Coastguard Worker pmulld m0, m6, m10 3822*c0909341SAndroid Build Coastguard Worker pmulld m2, m6, m11 3823*c0909341SAndroid Build Coastguard Worker pmulld m4, m6, m12 3824*c0909341SAndroid Build Coastguard Worker pmulld m6, m13 3825*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_17408] 3826*c0909341SAndroid Build Coastguard Worker call m(idct_4x16_internal_10bpc).pass1_main2 3827*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7 3828*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 3829*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 3830*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 3831*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 3832*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 3833*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 3834*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end2 3835*c0909341SAndroid Build Coastguard Worker 3836*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, dct, 12 3837*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, adst, 12 3838*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, flipadst, 12 3839*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN adst, identity, 12 3840*c0909341SAndroid Build Coastguard Worker 3841*c0909341SAndroid Build Coastguard Workercglobal iadst_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 3842*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 3843*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 3844*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x4_internal_10bpc).pass1 3845*c0909341SAndroid Build Coastguard Worker.pass2: 3846*c0909341SAndroid Build Coastguard Worker call .pass2_main 3847*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 3848*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m0, m1, m2, m3 3849*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end2 3850*c0909341SAndroid Build Coastguard WorkerALIGN function_align 3851*c0909341SAndroid Build Coastguard Worker.pass2_main: 3852*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3853*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3854*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m6, m7 3855*c0909341SAndroid Build Coastguard Worker pmaxsd m8, m4, m12 3856*c0909341SAndroid Build Coastguard Worker pmaxsd m9, m5, m12 3857*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 3858*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_12bpc).transpose_4x8 3859*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m0 3860*c0909341SAndroid Build Coastguard Worker mova [cq+32*2], m1 3861*c0909341SAndroid Build Coastguard Worker mova [cq+32*4], m2 3862*c0909341SAndroid Build Coastguard Worker mova [cq+32*6], m3 3863*c0909341SAndroid Build Coastguard Worker pminsd m0, m8, m13 3864*c0909341SAndroid Build Coastguard Worker pminsd m1, m9, m13 3865*c0909341SAndroid Build Coastguard Worker pminsd m2, m6, m13 3866*c0909341SAndroid Build Coastguard Worker pminsd m3, m7, m13 3867*c0909341SAndroid Build Coastguard Worker call m(iadst_8x4_internal_12bpc).transpose_4x8 3868*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m0 3869*c0909341SAndroid Build Coastguard Worker mova [cq+32*3], m1 3870*c0909341SAndroid Build Coastguard Worker mova [cq+32*5], m2 3871*c0909341SAndroid Build Coastguard Worker mova [cq+32*7], m3 3872*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main 3873*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_2048] 3874*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_10bpc).main_end 3875*c0909341SAndroid Build Coastguard Worker psrad m0, m4, 15 3876*c0909341SAndroid Build Coastguard Worker psrad m1, m5, 15 3877*c0909341SAndroid Build Coastguard Worker psrad m2, 15 3878*c0909341SAndroid Build Coastguard Worker psrad m3, 15 3879*c0909341SAndroid Build Coastguard Worker psrad m4, m8, 15 3880*c0909341SAndroid Build Coastguard Worker psrad m5, m9, 15 3881*c0909341SAndroid Build Coastguard Worker psrad m6, 15 3882*c0909341SAndroid Build Coastguard Worker psrad m7, 15 3883*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 3884*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 3885*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 3886*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 3887*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_16384] 3888*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 3889*c0909341SAndroid Build Coastguard Worker ret 3890*c0909341SAndroid Build Coastguard Worker 3891*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, dct, 12 3892*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, adst, 12 3893*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, flipadst, 12 3894*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN flipadst, identity, 12 3895*c0909341SAndroid Build Coastguard Worker 3896*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 3897*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 3898*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 3899*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_16x4_internal_10bpc).pass1 3900*c0909341SAndroid Build Coastguard Worker.pass2: 3901*c0909341SAndroid Build Coastguard Worker call m(iadst_16x4_internal_12bpc).pass2_main 3902*c0909341SAndroid Build Coastguard Worker vpermq m7, m0, q3120 3903*c0909341SAndroid Build Coastguard Worker vpermq m6, m1, q3120 3904*c0909341SAndroid Build Coastguard Worker vpermq m1, m2, q3120 3905*c0909341SAndroid Build Coastguard Worker vpermq m0, m3, q3120 3906*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4 3907*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m4 3908*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6, m4 3909*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, m4 3910*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end2 3911*c0909341SAndroid Build Coastguard Worker 3912*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, dct, 12 3913*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, adst, 12 3914*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, flipadst, 12 3915*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X4_FN identity, identity, 12 3916*c0909341SAndroid Build Coastguard Worker 3917*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x4_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2 3918*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_1697] 3919*c0909341SAndroid Build Coastguard Worker vpermq m0, [cq+32*0], q3120 ; 0 1 3920*c0909341SAndroid Build Coastguard Worker vpermq m1, [cq+32*1], q3120 ; 2 3 3921*c0909341SAndroid Build Coastguard Worker vpermq m2, [cq+32*2], q3120 ; 4 5 3922*c0909341SAndroid Build Coastguard Worker vpermq m3, [cq+32*3], q3120 ; 6 7 3923*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_3072] 3924*c0909341SAndroid Build Coastguard Worker pmulld m4, m8, m0 3925*c0909341SAndroid Build Coastguard Worker pmulld m5, m8, m1 3926*c0909341SAndroid Build Coastguard Worker pmulld m6, m8, m2 3927*c0909341SAndroid Build Coastguard Worker pmulld m7, m8, m3 3928*c0909341SAndroid Build Coastguard Worker vpermq m10, [cq+32*4], q3120 ; 8 9 3929*c0909341SAndroid Build Coastguard Worker vpermq m11, [cq+32*5], q3120 ; a b 3930*c0909341SAndroid Build Coastguard Worker vpermq m12, [cq+32*6], q3120 ; c d 3931*c0909341SAndroid Build Coastguard Worker vpermq m13, [cq+32*7], q3120 ; e f 3932*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m4, m5, m6, m7 3933*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m5, m6, m7 3934*c0909341SAndroid Build Coastguard Worker paddd m0, m4 3935*c0909341SAndroid Build Coastguard Worker pmulld m4, m8, m10 3936*c0909341SAndroid Build Coastguard Worker paddd m1, m5 3937*c0909341SAndroid Build Coastguard Worker pmulld m5, m8, m11 3938*c0909341SAndroid Build Coastguard Worker paddd m2, m6 3939*c0909341SAndroid Build Coastguard Worker pmulld m6, m8, m12 3940*c0909341SAndroid Build Coastguard Worker paddd m3, m7 3941*c0909341SAndroid Build Coastguard Worker pmulld m7, m8, m13 3942*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m4, m5, m6, m7 3943*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m5, m6, m7 3944*c0909341SAndroid Build Coastguard Worker paddd m4, m10 3945*c0909341SAndroid Build Coastguard Worker paddd m5, m11 3946*c0909341SAndroid Build Coastguard Worker paddd m6, m12 3947*c0909341SAndroid Build Coastguard Worker paddd m7, m13 3948*c0909341SAndroid Build Coastguard Worker jmp tx2q 3949*c0909341SAndroid Build Coastguard Worker.pass2: 3950*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3951*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3952*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 3953*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 3954*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5793] 3955*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_2048] 3956*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m8}, m0, m1, m2, m3, m4, m5, m6, m7 3957*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m9}, m0, m1, m2, m3, m4, m5, m6, m7 3958*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m0, m1, m2, m3, m4, m5, m6, m7 3959*c0909341SAndroid Build Coastguard Worker call m(idct_16x4_internal_10bpc).transpose_4x16_packed 3960*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pw_16384] 3961*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m4}, m0, m1, m2, m3 3962*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pixel_12bpc_max] 3963*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x4_internal_10bpc).end2 3964*c0909341SAndroid Build Coastguard Worker 3965*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X8_FN 2-3 10 ; type1, type2, bitdepth 3966*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, 0, 16x8, %3 3967*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 3968*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 3969*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_%3bpc] 3970*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 3971*c0909341SAndroid Build Coastguard Worker or r3d, 8 3972*c0909341SAndroid Build Coastguard Worker add r6d, 128 3973*c0909341SAndroid Build Coastguard Worker sar r6d, 8 3974*c0909341SAndroid Build Coastguard Worker imul r6d, 181 3975*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly2 3976*c0909341SAndroid Build Coastguard Worker%endif 3977*c0909341SAndroid Build Coastguard Worker%endmacro 3978*c0909341SAndroid Build Coastguard Worker 3979*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct 3980*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity 3981*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst 3982*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst 3983*c0909341SAndroid Build Coastguard Worker 3984*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 3985*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 3986*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 3987*c0909341SAndroid Build Coastguard Worker.pass1: 3988*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 3989*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+32* 1] 3990*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+32* 3] 3991*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+32* 5] 3992*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+32* 7] 3993*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+32* 9] 3994*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+32*11] 3995*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+32*13] 3996*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+32*15] 3997*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 3998*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 3999*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_rect2 4000*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+32* 0] 4001*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+32* 2] 4002*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+32* 4] 4003*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+32* 6] 4004*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+32* 8] 4005*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+32*10] 4006*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+32*12] 4007*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+32*14] 4008*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_rect2 4009*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 4010*c0909341SAndroid Build Coastguard Worker psrld m11, 11 ; pd_1 4011*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 4012*c0909341SAndroid Build Coastguard Worker call .pass1_rotations 4013*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7, \ 4014*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4015*c0909341SAndroid Build Coastguard Worker jmp tx2q 4016*c0909341SAndroid Build Coastguard Worker.pass2: 4017*c0909341SAndroid Build Coastguard Worker call .transpose 4018*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 4019*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2048] 4020*c0909341SAndroid Build Coastguard Worker.end: 4021*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10 4022*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10 4023*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m10 4024*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m10 4025*c0909341SAndroid Build Coastguard Worker call .write_16x4_start 4026*c0909341SAndroid Build Coastguard Worker.end2: 4027*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, m10 4028*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5, m10 4029*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6, m10 4030*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, m10 4031*c0909341SAndroid Build Coastguard Worker call .write_16x4_zero 4032*c0909341SAndroid Build Coastguard Worker RET 4033*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4034*c0909341SAndroid Build Coastguard Worker.pass1_rotations: 4035*c0909341SAndroid Build Coastguard Worker mova m14, [r6-32*4] 4036*c0909341SAndroid Build Coastguard Worker mova m13, [r6-32*3] 4037*c0909341SAndroid Build Coastguard Worker mova m12, [r6-32*2] 4038*c0909341SAndroid Build Coastguard Worker mova m11, [r6-32*1] 4039*c0909341SAndroid Build Coastguard Worker mova m10, [r6+32*0] 4040*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*1] 4041*c0909341SAndroid Build Coastguard Worker mova m8, [r6+32*2] 4042*c0909341SAndroid Build Coastguard Worker psubd m15, m0, m14 ; out15 4043*c0909341SAndroid Build Coastguard Worker paddd m0, m14 ; out0 4044*c0909341SAndroid Build Coastguard Worker psubd m14, m1, m13 ; out14 4045*c0909341SAndroid Build Coastguard Worker paddd m1, m13 ; out1 4046*c0909341SAndroid Build Coastguard Worker psubd m13, m2, m12 ; out13 4047*c0909341SAndroid Build Coastguard Worker paddd m2, m12 ; out2 4048*c0909341SAndroid Build Coastguard Worker psubd m12, m3, m11 ; out12 4049*c0909341SAndroid Build Coastguard Worker paddd m3, m11 ; out3 4050*c0909341SAndroid Build Coastguard Worker psubd m11, m4, m10 ; out11 4051*c0909341SAndroid Build Coastguard Worker paddd m4, m10 ; out4 4052*c0909341SAndroid Build Coastguard Worker psubd m10, m5, m9 ; out10 4053*c0909341SAndroid Build Coastguard Worker paddd m5, m9 ; out5 4054*c0909341SAndroid Build Coastguard Worker psubd m9, m6, m8 ; out9 4055*c0909341SAndroid Build Coastguard Worker paddd m6, m8 ; out6 4056*c0909341SAndroid Build Coastguard Worker psubd m8, m7, [r6+32*3] ; out8 4057*c0909341SAndroid Build Coastguard Worker paddd m7, [r6+32*3] ; out7 4058*c0909341SAndroid Build Coastguard Worker ret 4059*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4060*c0909341SAndroid Build Coastguard Worker.transpose: 4061*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 4062*c0909341SAndroid Build Coastguard Worker.transpose2: 4063*c0909341SAndroid Build Coastguard Worker packssdw m0, m8 4064*c0909341SAndroid Build Coastguard Worker packssdw m1, m9 4065*c0909341SAndroid Build Coastguard Worker packssdw m2, m10 4066*c0909341SAndroid Build Coastguard Worker packssdw m3, m11 4067*c0909341SAndroid Build Coastguard Worker packssdw m4, m12 4068*c0909341SAndroid Build Coastguard Worker packssdw m5, m13 4069*c0909341SAndroid Build Coastguard Worker packssdw m6, m14 4070*c0909341SAndroid Build Coastguard Worker packssdw m7, m15 4071*c0909341SAndroid Build Coastguard Worker.transpose3: 4072*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 4073*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 4074*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m3 4075*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 4076*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m4, m5 4077*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 4078*c0909341SAndroid Build Coastguard Worker punpckhwd m5, m6, m7 4079*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 4080*c0909341SAndroid Build Coastguard Worker punpckhdq m7, m4, m6 4081*c0909341SAndroid Build Coastguard Worker punpckldq m4, m6 4082*c0909341SAndroid Build Coastguard Worker punpckldq m6, m8, m2 4083*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m2 4084*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m0, m1 4085*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 4086*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m3, m5 4087*c0909341SAndroid Build Coastguard Worker punpckldq m3, m5 4088*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m6, m3 4089*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m3 4090*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m2, m7 4091*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m7 4092*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m8, m1 4093*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m1 4094*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m4 4095*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 4096*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m5, 0x31 4097*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm5, 1 4098*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m6, 0x31 4099*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm6, 1 4100*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m2, m7, 0x31 4101*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm7, 1 4102*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m8, 0x31 4103*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm8, 1 4104*c0909341SAndroid Build Coastguard Worker ret 4105*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4106*c0909341SAndroid Build Coastguard Worker.write_16x4_start: 4107*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pixel_10bpc_max] 4108*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 4109*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4110*c0909341SAndroid Build Coastguard Worker.write_16x4_zero: 4111*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m8}, 0, 1, 2, 3, 4, 5, 6, 7 4112*c0909341SAndroid Build Coastguard Worker add cq, 32*8 4113*c0909341SAndroid Build Coastguard Worker.write_16x4: 4114*c0909341SAndroid Build Coastguard Worker paddw m0, [dstq+strideq*0] 4115*c0909341SAndroid Build Coastguard Worker paddw m1, [dstq+strideq*1] 4116*c0909341SAndroid Build Coastguard Worker paddw m2, [dstq+strideq*2] 4117*c0909341SAndroid Build Coastguard Worker paddw m3, [dstq+r3 ] 4118*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m8}, m0, m1, m2, m3 4119*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m9}, m0, m1, m2, m3 4120*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], m0 4121*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], m1 4122*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], m2 4123*c0909341SAndroid Build Coastguard Worker mova [dstq+r3 ], m3 4124*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 4125*c0909341SAndroid Build Coastguard Worker ret 4126*c0909341SAndroid Build Coastguard Worker 4127*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct 4128*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst 4129*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst 4130*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity 4131*c0909341SAndroid Build Coastguard Worker 4132*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4133*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 4134*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 4135*c0909341SAndroid Build Coastguard Worker.pass1: 4136*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 4137*c0909341SAndroid Build Coastguard Worker call .main 4138*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_3072] 4139*c0909341SAndroid Build Coastguard Worker psrld m15, 11 ; pd_1 4140*c0909341SAndroid Build Coastguard Worker psubd m13, m14, m15 ; pd_3071 4141*c0909341SAndroid Build Coastguard Worker call .pass1_rotations 4142*c0909341SAndroid Build Coastguard Worker.pass1_end: 4143*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m1, m2, m3, m12, m13, m14, m15 4144*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m4, m5, m6, m7, m8, m9, m10, m11 4145*c0909341SAndroid Build Coastguard Worker jmp tx2q 4146*c0909341SAndroid Build Coastguard Worker.pass2: 4147*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose 4148*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 4149*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2_end 4150*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2048] 4151*c0909341SAndroid Build Coastguard Worker pxor m11, m11 4152*c0909341SAndroid Build Coastguard Worker psubw m11, m10 4153*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10 4154*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11 4155*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m10 4156*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11 4157*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 4158*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m4, m10 4159*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m5, m11 4160*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m6, m10 4161*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m7, m11 4162*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4163*c0909341SAndroid Build Coastguard Worker RET 4164*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4165*c0909341SAndroid Build Coastguard Worker.pass1_rotations: 4166*c0909341SAndroid Build Coastguard Worker paddd m0, m15 4167*c0909341SAndroid Build Coastguard Worker psubd m1, m15, m1 4168*c0909341SAndroid Build Coastguard Worker paddd m2, m15 4169*c0909341SAndroid Build Coastguard Worker psubd m3, m15, m3 4170*c0909341SAndroid Build Coastguard Worker paddd m4, m14 4171*c0909341SAndroid Build Coastguard Worker psubd m5, m13, m5 4172*c0909341SAndroid Build Coastguard Worker paddd m6, m14 4173*c0909341SAndroid Build Coastguard Worker psubd m7, m13, m7 4174*c0909341SAndroid Build Coastguard Worker paddd m8, m14, m9 4175*c0909341SAndroid Build Coastguard Worker psubd m9, m13, m10 4176*c0909341SAndroid Build Coastguard Worker paddd m10, m14, m11 4177*c0909341SAndroid Build Coastguard Worker psubd m11, m13, m12 4178*c0909341SAndroid Build Coastguard Worker paddd m12, m15, [r6-32*1] 4179*c0909341SAndroid Build Coastguard Worker psubd m13, m15, [r6-32*2] 4180*c0909341SAndroid Build Coastguard Worker paddd m14, m15, [r6-32*3] 4181*c0909341SAndroid Build Coastguard Worker psubd m15, [r6-32*4] 4182*c0909341SAndroid Build Coastguard Worker ret 4183*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4184*c0909341SAndroid Build Coastguard Worker.main: 4185*c0909341SAndroid Build Coastguard Worker ; expects: m13 = clip_min m14 = clip_max 4186*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 4187*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+32* 2] 4188*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+32*13] 4189*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+32* 6] 4190*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+32* 9] 4191*c0909341SAndroid Build Coastguard Worker pmulld m4, m15, [cq+32*10] 4192*c0909341SAndroid Build Coastguard Worker pmulld m5, m15, [cq+32* 5] 4193*c0909341SAndroid Build Coastguard Worker pmulld m6, m15, [cq+32*14] 4194*c0909341SAndroid Build Coastguard Worker pmulld m7, m15, [cq+32* 1] 4195*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_2048] 4196*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 4197*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 4198*c0909341SAndroid Build Coastguard Worker call .main_part1 4199*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+32* 0] 4200*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+32*15] 4201*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+32* 4] 4202*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+32*11] 4203*c0909341SAndroid Build Coastguard Worker pmulld m4, m15, [cq+32* 8] 4204*c0909341SAndroid Build Coastguard Worker pmulld m5, m15, [cq+32* 7] 4205*c0909341SAndroid Build Coastguard Worker pmulld m6, m15, [cq+32*12] 4206*c0909341SAndroid Build Coastguard Worker pmulld m7, m15, [cq+32* 3] 4207*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 4208*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 4209*c0909341SAndroid Build Coastguard Worker.main_part2: 4210*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 8, 9, 10, 12, 201, 4091 4211*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 8, 9, 10, 12, 1751, 3703 4212*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 4, 8, 9, 10, 12, 3035, 2751 4213*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, 10, 12, 3857, 1380 4214*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t8a 4215*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t0a 4216*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m5 ; t9a 4217*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t1a 4218*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m6 ; t12a 4219*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t4a 4220*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m7 ; t13a 4221*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t5a 4222*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m13}, m8, m4, m5, m6, m0, m1, m2, m7 4223*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m8, m4, m5, m6, m0, m1, m2, m7 4224*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_4017] 4225*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_799] 4226*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 4, 3, 9, _, 12, 10, 11 4227*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 5, 3, 9, _, 12, 11, 10 4228*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m2 ; t4 4229*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t0 4230*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m7 ; t5 4231*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ; t1 4232*c0909341SAndroid Build Coastguard Worker psubd m7, m4, m6 ; t12a 4233*c0909341SAndroid Build Coastguard Worker paddd m4, m6 ; t8a 4234*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m5 ; t13a 4235*c0909341SAndroid Build Coastguard Worker paddd m5, m8 ; t9a 4236*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m13}, m3, m2, m7, m6, m0, m1, m4, m5 4237*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m3, m2, m7, m6, m0, m1, m4, m5 4238*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_3784] 4239*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 4240*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 8, 9, _, 12, 10, 11 4241*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, _, 12, 10, 11 4242*c0909341SAndroid Build Coastguard Worker pminsd m10, m14, [r6-32*4] ; t2 4243*c0909341SAndroid Build Coastguard Worker pminsd m8, m14, [r6-32*3] ; t3 4244*c0909341SAndroid Build Coastguard Worker psubd m9, m0, m10 ; t2a 4245*c0909341SAndroid Build Coastguard Worker paddd m0, m10 ; out0 4246*c0909341SAndroid Build Coastguard Worker psubd m10, m1, m8 ; t3a 4247*c0909341SAndroid Build Coastguard Worker paddd m1, m8 ; -out15 4248*c0909341SAndroid Build Coastguard Worker pmaxsd m9, m13 4249*c0909341SAndroid Build Coastguard Worker pmaxsd m10, m13 4250*c0909341SAndroid Build Coastguard Worker pminsd m9, m14 4251*c0909341SAndroid Build Coastguard Worker pminsd m10, m14 4252*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m1 4253*c0909341SAndroid Build Coastguard Worker mova m11, [r6-32*1] ; t7a 4254*c0909341SAndroid Build Coastguard Worker mova m1, [r6-32*2] ; t6a 4255*c0909341SAndroid Build Coastguard Worker psubd m8, m3, m11 ; t7 4256*c0909341SAndroid Build Coastguard Worker paddd m11, m3 ; out12 4257*c0909341SAndroid Build Coastguard Worker paddd m3, m2, m1 ; -out3 4258*c0909341SAndroid Build Coastguard Worker psubd m2, m1 ; t6 4259*c0909341SAndroid Build Coastguard Worker pmaxsd m8, m13 4260*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m13 4261*c0909341SAndroid Build Coastguard Worker pminsd m8, m14 4262*c0909341SAndroid Build Coastguard Worker pminsd m2, m14 4263*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m11 4264*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m2 4265*c0909341SAndroid Build Coastguard Worker mova m1, [r6+32*3] ; t15 4266*c0909341SAndroid Build Coastguard Worker mova m2, [r6+32*2] ; t14 4267*c0909341SAndroid Build Coastguard Worker paddd m12, m7, m1 ; -out13 4268*c0909341SAndroid Build Coastguard Worker psubd m7, m1 ; t15a 4269*c0909341SAndroid Build Coastguard Worker psubd m11, m6, m2 ; t14a 4270*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; out2 4271*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m13 4272*c0909341SAndroid Build Coastguard Worker pmaxsd m11, m13 4273*c0909341SAndroid Build Coastguard Worker pminsd m7, m14 4274*c0909341SAndroid Build Coastguard Worker pminsd m11, m14 4275*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m12 4276*c0909341SAndroid Build Coastguard Worker pminsd m1, m14, [r6+32*0] ; t10a 4277*c0909341SAndroid Build Coastguard Worker pminsd m12, m14, [r6+32*1] ; t11a 4278*c0909341SAndroid Build Coastguard Worker psubd m6, m4, m1 ; t10 4279*c0909341SAndroid Build Coastguard Worker paddd m1, m4 ; -out1 4280*c0909341SAndroid Build Coastguard Worker psubd m4, m5, m12 ; t11 4281*c0909341SAndroid Build Coastguard Worker paddd m5, m12 ; out14 4282*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_1448] 4283*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m13 4284*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m13 4285*c0909341SAndroid Build Coastguard Worker pminsd m6, m14 4286*c0909341SAndroid Build Coastguard Worker pminsd m4, m14 4287*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m12}, m9, m10, m8, m7, m11, m6, m4 4288*c0909341SAndroid Build Coastguard Worker pmulld m12, [r6-32*3] ; t6 4289*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m5 4290*c0909341SAndroid Build Coastguard Worker paddd m5, m11, m7 ; -out5 (unshifted) 4291*c0909341SAndroid Build Coastguard Worker psubd m11, m7 ; out10 (unshifted) 4292*c0909341SAndroid Build Coastguard Worker paddd m7, m9, m10 ; -out7 (unshifted) 4293*c0909341SAndroid Build Coastguard Worker psubd m9, m10 ; out8 (unshifted) 4294*c0909341SAndroid Build Coastguard Worker psubd m10, m6, m4 ; -out9 (unshifted) 4295*c0909341SAndroid Build Coastguard Worker paddd m6, m4 ; out6 (unshifted) 4296*c0909341SAndroid Build Coastguard Worker paddd m4, m12, m8 ; out4 (unshifted) 4297*c0909341SAndroid Build Coastguard Worker psubd m12, m8 ; -out11 (unshifted) 4298*c0909341SAndroid Build Coastguard Worker ret 4299*c0909341SAndroid Build Coastguard Worker.main_part1: 4300*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 0, 8, 9, 10, 12, 995, 3973 4301*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 2, 8, 9, 10, 12, 2440, 3290 4302*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 4, 8, 9, 10, 12, 3513, 2106 4303*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 6, 8, 9, 10, 12, 4052, 601 4304*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t10a 4305*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t2a 4306*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m5 ; t11a 4307*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t3a 4308*c0909341SAndroid Build Coastguard Worker psubd m5, m2, m6 ; t14a 4309*c0909341SAndroid Build Coastguard Worker paddd m2, m6 ; t6a 4310*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m7 ; t15a 4311*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t7a 4312*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m13}, m8, m4, m5, m6, m0, m1, m2, m7 4313*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m8, m4, m5, m6, m0, m1, m2, m7 4314*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2276] 4315*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_3406] 4316*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 4, 3, 9, _, 12, 10, 11 4317*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 5, 3, 9, _, 12, 11, 10 4318*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m2 ; t6 4319*c0909341SAndroid Build Coastguard Worker paddd m0, m2 ; t2 4320*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m7 ; t7 4321*c0909341SAndroid Build Coastguard Worker paddd m1, m7 ; t3 4322*c0909341SAndroid Build Coastguard Worker psubd m7, m4, m6 ; t14a 4323*c0909341SAndroid Build Coastguard Worker paddd m4, m6 ; t10a 4324*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m5 ; t15a 4325*c0909341SAndroid Build Coastguard Worker paddd m5, m8 ; t11a 4326*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m13}, m3, m2, m7, m6, m0, m1, m4, m5 4327*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m3, m2, m7, m6 ; clip the rest later 4328*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_1567] 4329*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_3784] 4330*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 3, 8, 9, _, 12, 10, 11 4331*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 7, 8, 9, _, 12, 10, 11 4332*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 4333*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 4334*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 4335*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 4336*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 4337*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 4338*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 4339*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 4340*c0909341SAndroid Build Coastguard Worker ret 4341*c0909341SAndroid Build Coastguard Worker 4342*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct 4343*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst 4344*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst 4345*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity 4346*c0909341SAndroid Build Coastguard Worker 4347*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4348*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 4349*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 4350*c0909341SAndroid Build Coastguard Worker.pass1: 4351*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 4352*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main 4353*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_3072] 4354*c0909341SAndroid Build Coastguard Worker psrld m15, 11 4355*c0909341SAndroid Build Coastguard Worker psubd m13, m14, m15 4356*c0909341SAndroid Build Coastguard Worker call .pass1_rotations 4357*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x8_internal_10bpc).pass1_end 4358*c0909341SAndroid Build Coastguard Worker.pass2: 4359*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose 4360*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main 4361*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_8bpc).main_pass2_end 4362*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2048] 4363*c0909341SAndroid Build Coastguard Worker pxor m11, m11 4364*c0909341SAndroid Build Coastguard Worker psubw m11, m10 4365*c0909341SAndroid Build Coastguard Worker mova m12, m0 4366*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m7, m11 4367*c0909341SAndroid Build Coastguard Worker mova m7, m1 4368*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6, m10 4369*c0909341SAndroid Build Coastguard Worker mova m6, m2 4370*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m5, m11 4371*c0909341SAndroid Build Coastguard Worker mova m5, m3 4372*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m4, m10 4373*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 4374*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m5, m11 4375*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m6, m10 4376*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m7, m11 4377*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m10 4378*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4379*c0909341SAndroid Build Coastguard Worker RET 4380*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4381*c0909341SAndroid Build Coastguard Worker.pass1_rotations: 4382*c0909341SAndroid Build Coastguard Worker psubd m8, m13, m7 4383*c0909341SAndroid Build Coastguard Worker paddd m7, m14, m9 4384*c0909341SAndroid Build Coastguard Worker paddd m9, m14, m6 4385*c0909341SAndroid Build Coastguard Worker psubd m6, m13, m10 4386*c0909341SAndroid Build Coastguard Worker psubd m10, m13, m5 4387*c0909341SAndroid Build Coastguard Worker paddd m5, m14, m11 4388*c0909341SAndroid Build Coastguard Worker paddd m11, m14, m4 4389*c0909341SAndroid Build Coastguard Worker psubd m4, m13, m12 4390*c0909341SAndroid Build Coastguard Worker psubd m12, m15, m3 4391*c0909341SAndroid Build Coastguard Worker paddd m3, m15, [r6-32*1] 4392*c0909341SAndroid Build Coastguard Worker paddd m13, m15, m2 4393*c0909341SAndroid Build Coastguard Worker psubd m2, m15, [r6-32*2] 4394*c0909341SAndroid Build Coastguard Worker psubd m14, m15, m1 4395*c0909341SAndroid Build Coastguard Worker mova m1, m15 4396*c0909341SAndroid Build Coastguard Worker paddd m15, m0 4397*c0909341SAndroid Build Coastguard Worker psubd m0, m1, [r6-32*4] 4398*c0909341SAndroid Build Coastguard Worker paddd m1, [r6-32*3] 4399*c0909341SAndroid Build Coastguard Worker ret 4400*c0909341SAndroid Build Coastguard Worker 4401*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct 4402*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst 4403*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst 4404*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity 4405*c0909341SAndroid Build Coastguard Worker 4406*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_10bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4407*c0909341SAndroid Build Coastguard Worker.pass1: 4408*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 4409*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+32* 0] 4410*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+32* 1] 4411*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+32* 2] 4412*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+32* 3] 4413*c0909341SAndroid Build Coastguard Worker pmulld m4, m15, [cq+32* 4] 4414*c0909341SAndroid Build Coastguard Worker pmulld m5, m15, [cq+32* 5] 4415*c0909341SAndroid Build Coastguard Worker pmulld m6, m15, [cq+32* 6] 4416*c0909341SAndroid Build Coastguard Worker pmulld m7, m15, [cq+32* 7] 4417*c0909341SAndroid Build Coastguard Worker pmulld m8, m15, [cq+32* 8] 4418*c0909341SAndroid Build Coastguard Worker pmulld m9, m15, [cq+32* 9] 4419*c0909341SAndroid Build Coastguard Worker pmulld m10, m15, [cq+32*10] 4420*c0909341SAndroid Build Coastguard Worker pmulld m11, m15, [cq+32*11] 4421*c0909341SAndroid Build Coastguard Worker pmulld m12, m15, [cq+32*12] 4422*c0909341SAndroid Build Coastguard Worker pmulld m13, m15, [cq+32*13] 4423*c0909341SAndroid Build Coastguard Worker pmulld m14, m15, [cq+32*14] 4424*c0909341SAndroid Build Coastguard Worker pmulld m15, [cq+32*15] 4425*c0909341SAndroid Build Coastguard Worker mova [rsp], m7 4426*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_2048] 4427*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7 }, m0, m1, m2, m3, m4, m5, m6, \ 4428*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4429*c0909341SAndroid Build Coastguard Worker paddd m7, [rsp] 4430*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7, \ 4431*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4432*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 4433*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_5793] 4434*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m15}, m0, m1, m2, m3, m4, m5, m6, m7, \ 4435*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14 4436*c0909341SAndroid Build Coastguard Worker pmulld m15, [rsp] 4437*c0909341SAndroid Build Coastguard Worker mova [rsp], m7 4438*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_3072] 4439*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7 }, m0, m1, m2, m3, m4, m5, m6, \ 4440*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4441*c0909341SAndroid Build Coastguard Worker paddd m7, [rsp] 4442*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7, \ 4443*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4444*c0909341SAndroid Build Coastguard Worker jmp tx2q 4445*c0909341SAndroid Build Coastguard Worker.pass2: 4446*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose 4447*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_4096] 4448*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).end 4449*c0909341SAndroid Build Coastguard Worker 4450*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, dct, 12 4451*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, identity, 12 4452*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, adst, 12 4453*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN dct, flipadst, 12 4454*c0909341SAndroid Build Coastguard Worker 4455*c0909341SAndroid Build Coastguard Workercglobal idct_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4456*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 4457*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 4458*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).pass1 4459*c0909341SAndroid Build Coastguard Worker.pass2: 4460*c0909341SAndroid Build Coastguard Worker call .pass2_main 4461*c0909341SAndroid Build Coastguard Worker RET 4462*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4463*c0909341SAndroid Build Coastguard Worker.pass2_main: 4464*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_12bpc).transpose 4465*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 4466*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 4467*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 4468*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 4469*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 4470*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 4471*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).round_shift4 4472*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m0 4473*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m1 4474*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m2 4475*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m3 4476*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m4 4477*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m5 4478*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m6 4479*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m7 4480*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+32*0] 4481*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+32*1] 4482*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+32*2] 4483*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+32*3] 4484*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+32*4] 4485*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+32*5] 4486*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+32*6] 4487*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+32*7] 4488*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 4489*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 4490*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).round_shift4 4491*c0909341SAndroid Build Coastguard Worker.end: 4492*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+32* 8] 4493*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+32* 9] 4494*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+32*10] 4495*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+32*11] 4496*c0909341SAndroid Build Coastguard Worker packssdw m4, [cq+32*12] 4497*c0909341SAndroid Build Coastguard Worker packssdw m5, [cq+32*13] 4498*c0909341SAndroid Build Coastguard Worker packssdw m6, [cq+32*14] 4499*c0909341SAndroid Build Coastguard Worker packssdw m7, [cq+32*15] 4500*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 4501*c0909341SAndroid Build Coastguard Worker call .write_16x4_start 4502*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4503*c0909341SAndroid Build Coastguard Worker vpermq m0, m4, q3120 4504*c0909341SAndroid Build Coastguard Worker vpermq m1, m5, q3120 4505*c0909341SAndroid Build Coastguard Worker vpermq m2, m6, q3120 4506*c0909341SAndroid Build Coastguard Worker vpermq m3, m7, q3120 4507*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_zero 4508*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4509*c0909341SAndroid Build Coastguard Worker.write_16x4_start: 4510*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pixel_12bpc_max] 4511*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 4512*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4513*c0909341SAndroid Build Coastguard Worker ret 4514*c0909341SAndroid Build Coastguard Worker 4515*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, dct, 12 4516*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, adst, 12 4517*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, flipadst, 12 4518*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN adst, identity, 12 4519*c0909341SAndroid Build Coastguard Worker 4520*c0909341SAndroid Build Coastguard Workercglobal iadst_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4521*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_min] 4522*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_20b_max] 4523*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x8_internal_10bpc).pass1 4524*c0909341SAndroid Build Coastguard Worker.pass2: 4525*c0909341SAndroid Build Coastguard Worker call .pass2_main 4526*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).end 4527*c0909341SAndroid Build Coastguard Worker RET 4528*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4529*c0909341SAndroid Build Coastguard Worker.pass2_main: 4530*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_12bpc).transpose 4531*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 4532*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 4533*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 4534*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7 4535*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 4536*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_12bpc).pass2_main2 4537*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m0 4538*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m1 4539*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m2 4540*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m3 4541*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m4 4542*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m5 4543*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m6 4544*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m7 4545*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+32*0] 4546*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+32*1] 4547*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+32*2] 4548*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+32*3] 4549*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+32*4] 4550*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+32*5] 4551*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+32*6] 4552*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+32*7] 4553*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 4554*c0909341SAndroid Build Coastguard Worker call m(iadst_8x8_internal_12bpc).pass2_main2 4555*c0909341SAndroid Build Coastguard Worker ret 4556*c0909341SAndroid Build Coastguard Worker 4557*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, dct, 12 4558*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, adst, 12 4559*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, flipadst, 12 4560*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN flipadst, identity, 12 4561*c0909341SAndroid Build Coastguard Worker 4562*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4563*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_min] 4564*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_20b_max] 4565*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_16x8_internal_10bpc).pass1 4566*c0909341SAndroid Build Coastguard Worker.pass2: 4567*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_12bpc).pass2_main 4568*c0909341SAndroid Build Coastguard Worker packssdw m13, m0, [cq+32* 8] 4569*c0909341SAndroid Build Coastguard Worker packssdw m12, m1, [cq+32* 9] 4570*c0909341SAndroid Build Coastguard Worker packssdw m11, m2, [cq+32*10] 4571*c0909341SAndroid Build Coastguard Worker packssdw m10, m3, [cq+32*11] 4572*c0909341SAndroid Build Coastguard Worker packssdw m3, m4, [cq+32*12] 4573*c0909341SAndroid Build Coastguard Worker packssdw m2, m5, [cq+32*13] 4574*c0909341SAndroid Build Coastguard Worker packssdw m1, m6, [cq+32*14] 4575*c0909341SAndroid Build Coastguard Worker packssdw m0, m7, [cq+32*15] 4576*c0909341SAndroid Build Coastguard Worker REPX {vpermq x, x, q3120}, m0, m1, m2, m3 4577*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).write_16x4_start 4578*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4579*c0909341SAndroid Build Coastguard Worker vpermq m0, m10, q3120 4580*c0909341SAndroid Build Coastguard Worker vpermq m1, m11, q3120 4581*c0909341SAndroid Build Coastguard Worker vpermq m2, m12, q3120 4582*c0909341SAndroid Build Coastguard Worker vpermq m3, m13, q3120 4583*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4584*c0909341SAndroid Build Coastguard Worker RET 4585*c0909341SAndroid Build Coastguard Worker 4586*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, dct, 12 4587*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, adst, 12 4588*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, flipadst, 12 4589*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X8_FN identity, identity, 12 4590*c0909341SAndroid Build Coastguard Worker 4591*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2 4592*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_16x8_internal_10bpc).pass1 4593*c0909341SAndroid Build Coastguard Worker.pass2: 4594*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose2 4595*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_4096] 4596*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m10 4597*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m10 4598*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m10 4599*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m10 4600*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).write_16x4_start 4601*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4602*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).end2 4603*c0909341SAndroid Build Coastguard Worker 4604*c0909341SAndroid Build Coastguard Worker%macro INV_TXFM_16X16_FN 2-4 0,10 ; type1, type2, eob_offset, bitdepth 4605*c0909341SAndroid Build Coastguard Worker INV_TXFM_FN %1, %2, %3, 16x16, %4 4606*c0909341SAndroid Build Coastguard Worker%ifidn %1_%2, dct_dct 4607*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 4608*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_%4bpc] 4609*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 4610*c0909341SAndroid Build Coastguard Worker or r3d, 16 4611*c0909341SAndroid Build Coastguard Worker add r6d, 640 4612*c0909341SAndroid Build Coastguard Worker sar r6d, 10 4613*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly3 4614*c0909341SAndroid Build Coastguard Worker%endif 4615*c0909341SAndroid Build Coastguard Worker%endmacro 4616*c0909341SAndroid Build Coastguard Worker 4617*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct 4618*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity, 28 4619*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst 4620*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst 4621*c0909341SAndroid Build Coastguard Worker 4622*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 4623*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 4624*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 4625*c0909341SAndroid Build Coastguard Worker.pass1: 4626*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 4627*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 4628*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 4629*c0909341SAndroid Build Coastguard Worker sub eobd, 36 4630*c0909341SAndroid Build Coastguard Worker jl .fast 4631*c0909341SAndroid Build Coastguard Worker add cq, 32 4632*c0909341SAndroid Build Coastguard Worker call .main 4633*c0909341SAndroid Build Coastguard Worker sub cq, 32 4634*c0909341SAndroid Build Coastguard Worker mova m10, [r6-32*4] 4635*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*3] 4636*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*2] 4637*c0909341SAndroid Build Coastguard Worker psubd m15, m0, m10 ; out15 4638*c0909341SAndroid Build Coastguard Worker paddd m0, m10 ; out0 4639*c0909341SAndroid Build Coastguard Worker psubd m10, m1, m9 ; out14 4640*c0909341SAndroid Build Coastguard Worker paddd m1, m9 ; out1 4641*c0909341SAndroid Build Coastguard Worker psubd m9, m2, m8 ; out13 4642*c0909341SAndroid Build Coastguard Worker paddd m2, m8 ; out2 4643*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m0, m1, m2 4644*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 4645*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 4646*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 4647*c0909341SAndroid Build Coastguard Worker mova m2, [r6-32*1] 4648*c0909341SAndroid Build Coastguard Worker mova m1, [r6+32*0] 4649*c0909341SAndroid Build Coastguard Worker mova m0, [r6+32*1] 4650*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m9, m10, m15 4651*c0909341SAndroid Build Coastguard Worker psubd m8, m3, m2 ; out12 4652*c0909341SAndroid Build Coastguard Worker paddd m3, m2 ; out3 4653*c0909341SAndroid Build Coastguard Worker psubd m2, m4, m1 ; out11 4654*c0909341SAndroid Build Coastguard Worker paddd m4, m1 ; out4 4655*c0909341SAndroid Build Coastguard Worker psubd m1, m5, m0 ; out10 4656*c0909341SAndroid Build Coastguard Worker paddd m5, m0 ; out5 4657*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m3, m4, m5 4658*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 4659*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 4660*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 4661*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32*2] 4662*c0909341SAndroid Build Coastguard Worker mova m3, [r6+32*3] 4663*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m1, m2, m8 4664*c0909341SAndroid Build Coastguard Worker psubd m5, m6, m4 ; out9 4665*c0909341SAndroid Build Coastguard Worker paddd m6, m4 ; out6 4666*c0909341SAndroid Build Coastguard Worker psubd m4, m7, m3 ; out8 4667*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; out7 4668*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m6, m7, m4, m5 4669*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 4670*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 4671*c0909341SAndroid Build Coastguard Worker add r6, 32*8 4672*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m4 4673*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m5 4674*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m1 4675*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m2 4676*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m8 4677*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m9 4678*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m10 4679*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m15 4680*c0909341SAndroid Build Coastguard Worker.fast: 4681*c0909341SAndroid Build Coastguard Worker add r6, 32*8 4682*c0909341SAndroid Build Coastguard Worker call .main 4683*c0909341SAndroid Build Coastguard Worker mova m14, [r6-32*4] 4684*c0909341SAndroid Build Coastguard Worker mova m13, [r6-32*3] 4685*c0909341SAndroid Build Coastguard Worker mova m12, [r6-32*2] 4686*c0909341SAndroid Build Coastguard Worker mova m11, [r6-32*1] 4687*c0909341SAndroid Build Coastguard Worker mova m10, [r6+32*0] 4688*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*1] 4689*c0909341SAndroid Build Coastguard Worker mova m8, [r6+32*2] 4690*c0909341SAndroid Build Coastguard Worker psubd m15, m0, m14 ; out15 4691*c0909341SAndroid Build Coastguard Worker paddd m0, m14 ; out0 4692*c0909341SAndroid Build Coastguard Worker psubd m14, m1, m13 ; out14 4693*c0909341SAndroid Build Coastguard Worker paddd m1, m13 ; out1 4694*c0909341SAndroid Build Coastguard Worker psubd m13, m2, m12 ; out13 4695*c0909341SAndroid Build Coastguard Worker paddd m2, m12 ; out2 4696*c0909341SAndroid Build Coastguard Worker psubd m12, m3, m11 ; out12 4697*c0909341SAndroid Build Coastguard Worker paddd m3, m11 ; out3 4698*c0909341SAndroid Build Coastguard Worker psubd m11, m4, m10 ; out11 4699*c0909341SAndroid Build Coastguard Worker paddd m4, m10 ; out4 4700*c0909341SAndroid Build Coastguard Worker psubd m10, m5, m9 ; out10 4701*c0909341SAndroid Build Coastguard Worker paddd m5, m9 ; out5 4702*c0909341SAndroid Build Coastguard Worker psubd m9, m6, m8 ; out9 4703*c0909341SAndroid Build Coastguard Worker paddd m6, m8 ; out6 4704*c0909341SAndroid Build Coastguard Worker psubd m8, m7, [r6+32*3] ; out8 4705*c0909341SAndroid Build Coastguard Worker paddd m7, [r6+32*3] ; out7 4706*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 4707*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m0, m1, m2, m3, m4, m5, m6, m7, \ 4708*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 4709*c0909341SAndroid Build Coastguard Worker jmp tx2q 4710*c0909341SAndroid Build Coastguard Worker.pass2: 4711*c0909341SAndroid Build Coastguard Worker call .transpose 4712*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 4713*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 4714*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 4715*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 4716*c0909341SAndroid Build Coastguard Worker.end: 4717*c0909341SAndroid Build Coastguard Worker call .write_16x16 4718*c0909341SAndroid Build Coastguard Worker RET 4719*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4720*c0909341SAndroid Build Coastguard Worker.write_16x16: 4721*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m8 4722*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m9 4723*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m12 4724*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 4725*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 4726*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 4727*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12 4728*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12 4729*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 4730*c0909341SAndroid Build Coastguard Worker.write_16x16_2: 4731*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m4 4732*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m5 4733*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m6 4734*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m7 4735*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4736*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+gprsize+32*0] 4737*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, [rsp+gprsize+32*1] 4738*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m10 4739*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m11 4740*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4741*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+gprsize+32*2] 4742*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m13 4743*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m14 4744*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m15 4745*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_zero 4746*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4747*c0909341SAndroid Build Coastguard Worker.transpose: 4748*c0909341SAndroid Build Coastguard Worker test eobd, eobd 4749*c0909341SAndroid Build Coastguard Worker jl .transpose_fast 4750*c0909341SAndroid Build Coastguard Worker packssdw m8, [r6-32*4] 4751*c0909341SAndroid Build Coastguard Worker packssdw m9, [r6-32*3] 4752*c0909341SAndroid Build Coastguard Worker packssdw m10, [r6-32*2] 4753*c0909341SAndroid Build Coastguard Worker packssdw m11, [r6-32*1] 4754*c0909341SAndroid Build Coastguard Worker packssdw m12, [r6+32*0] 4755*c0909341SAndroid Build Coastguard Worker packssdw m13, [r6+32*1] 4756*c0909341SAndroid Build Coastguard Worker packssdw m14, [r6+32*2] 4757*c0909341SAndroid Build Coastguard Worker packssdw m15, [r6+32*3] 4758*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 4759*c0909341SAndroid Build Coastguard Worker packssdw m0, [r6-32*4] 4760*c0909341SAndroid Build Coastguard Worker packssdw m1, [r6-32*3] 4761*c0909341SAndroid Build Coastguard Worker packssdw m2, [r6-32*2] 4762*c0909341SAndroid Build Coastguard Worker packssdw m3, [r6-32*1] 4763*c0909341SAndroid Build Coastguard Worker packssdw m4, [r6+32*0] 4764*c0909341SAndroid Build Coastguard Worker packssdw m5, [r6+32*1] 4765*c0909341SAndroid Build Coastguard Worker packssdw m6, [r6+32*2] 4766*c0909341SAndroid Build Coastguard Worker packssdw m7, [r6+32*3] 4767*c0909341SAndroid Build Coastguard Worker mova [r6], m8 4768*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 4769*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 4770*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m2, m3 4771*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3 4772*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m6, m7 4773*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 4774*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m4, m5 4775*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m5 4776*c0909341SAndroid Build Coastguard Worker punpckldq m5, m8, m2 4777*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m2 4778*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m0, m1 4779*c0909341SAndroid Build Coastguard Worker punpckldq m0, m1 4780*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m7, m6 4781*c0909341SAndroid Build Coastguard Worker punpckldq m7, m6 4782*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m4, m3 4783*c0909341SAndroid Build Coastguard Worker punpckldq m4, m3 4784*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m2, m1 4785*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m1 4786*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m7 4787*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m7 4788*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m8, m6 4789*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m6 4790*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m5, m4 4791*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m4 4792*c0909341SAndroid Build Coastguard Worker mova m4, [r6] 4793*c0909341SAndroid Build Coastguard Worker mova [r6], m8 4794*c0909341SAndroid Build Coastguard Worker punpcklwd m8, m4, m9 4795*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m9 4796*c0909341SAndroid Build Coastguard Worker punpcklwd m9, m10, m11 4797*c0909341SAndroid Build Coastguard Worker punpckhwd m10, m11 4798*c0909341SAndroid Build Coastguard Worker punpckhwd m11, m14, m15 4799*c0909341SAndroid Build Coastguard Worker punpcklwd m14, m15 4800*c0909341SAndroid Build Coastguard Worker punpckhwd m15, m12, m13 4801*c0909341SAndroid Build Coastguard Worker punpcklwd m12, m13 4802*c0909341SAndroid Build Coastguard Worker punpckldq m13, m4, m10 4803*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m10 4804*c0909341SAndroid Build Coastguard Worker punpckhdq m10, m8, m9 4805*c0909341SAndroid Build Coastguard Worker punpckldq m8, m9 4806*c0909341SAndroid Build Coastguard Worker punpckhdq m9, m12, m14 4807*c0909341SAndroid Build Coastguard Worker punpckldq m12, m14 4808*c0909341SAndroid Build Coastguard Worker punpckhdq m14, m15, m11 4809*c0909341SAndroid Build Coastguard Worker punpckldq m15, m11 4810*c0909341SAndroid Build Coastguard Worker punpckhqdq m11, m10, m9 4811*c0909341SAndroid Build Coastguard Worker punpcklqdq m10, m9 4812*c0909341SAndroid Build Coastguard Worker punpckhqdq m9, m8, m12 4813*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m12 4814*c0909341SAndroid Build Coastguard Worker punpcklqdq m12, m13, m15 4815*c0909341SAndroid Build Coastguard Worker punpckhqdq m13, m15 4816*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m4, m14 4817*c0909341SAndroid Build Coastguard Worker punpcklqdq m14, m4, m14 4818*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m8, 0x31 4819*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm8, 1 4820*c0909341SAndroid Build Coastguard Worker vinserti128 m8, m5, xm12, 1 4821*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m5, 0x13 4822*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m9, 0x31 4823*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm9, 1 4824*c0909341SAndroid Build Coastguard Worker vinserti128 m9, m6, xm13, 1 4825*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m6, 0x13 4826*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m2, m10, 0x31 4827*c0909341SAndroid Build Coastguard Worker vinserti128 m2, xm10, 1 4828*c0909341SAndroid Build Coastguard Worker vinserti128 m10, m7, xm14, 1 4829*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m7, 0x13 4830*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m11, 0x31 4831*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm11, 1 4832*c0909341SAndroid Build Coastguard Worker mova xm11, [r6] 4833*c0909341SAndroid Build Coastguard Worker vinserti128 m11, xm15, 1 4834*c0909341SAndroid Build Coastguard Worker vinserti128 m15, [r6+16], 0 4835*c0909341SAndroid Build Coastguard Worker ret 4836*c0909341SAndroid Build Coastguard Worker.transpose_fast: 4837*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose2 4838*c0909341SAndroid Build Coastguard Worker pxor m8, m8 4839*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 4840*c0909341SAndroid Build Coastguard Worker ret 4841*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4842*c0909341SAndroid Build Coastguard Worker.main: 4843*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 1] 4844*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 3] 4845*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 5] 4846*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 7] 4847*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 9] 4848*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*11] 4849*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*13] 4850*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64*15] 4851*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 4852*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 4853*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 2] 4854*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 4] 4855*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 6] 4856*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 8] 4857*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64*10] 4858*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*12] 4859*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64*14] 4860*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 4861*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 4862*c0909341SAndroid Build Coastguard Worker psrld m10, m11, 10 ; pd_2 4863*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m10}, m0, m1, m2, m3, m4, m5, m6, m7 4864*c0909341SAndroid Build Coastguard Worker ret 4865*c0909341SAndroid Build Coastguard Worker 4866*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct 4867*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst 4868*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst 4869*c0909341SAndroid Build Coastguard Worker 4870*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 4871*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 4872*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 4873*c0909341SAndroid Build Coastguard Worker.pass1: 4874*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 4875*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 4876*c0909341SAndroid Build Coastguard Worker sub eobd, 36 4877*c0909341SAndroid Build Coastguard Worker jl .fast 4878*c0909341SAndroid Build Coastguard Worker add cq, 32 4879*c0909341SAndroid Build Coastguard Worker call .main 4880*c0909341SAndroid Build Coastguard Worker sub cq, 32 4881*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5120] 4882*c0909341SAndroid Build Coastguard Worker paddd m4, m8 4883*c0909341SAndroid Build Coastguard Worker paddd m6, m8 4884*c0909341SAndroid Build Coastguard Worker paddd m9, m8 4885*c0909341SAndroid Build Coastguard Worker paddd m11, m8 4886*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5119] 4887*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m5 4888*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m7 4889*c0909341SAndroid Build Coastguard Worker psubd m10, m8, m10 4890*c0909341SAndroid Build Coastguard Worker psubd m12, m8, m12 4891*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m4, m5, m6, m7, m9, m10, m11, m12 4892*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 4893*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 4894*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 4895*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 4896*c0909341SAndroid Build Coastguard Worker psrld m4, m15, 10 ; pd_2 4897*c0909341SAndroid Build Coastguard Worker paddd m0, m4 4898*c0909341SAndroid Build Coastguard Worker psubd m1, m4, m1 4899*c0909341SAndroid Build Coastguard Worker paddd m2, m4 4900*c0909341SAndroid Build Coastguard Worker psubd m3, m4, m3 4901*c0909341SAndroid Build Coastguard Worker psubd m7, m4, [r6-32*4] 4902*c0909341SAndroid Build Coastguard Worker paddd m6, m4, [r6-32*3] 4903*c0909341SAndroid Build Coastguard Worker psubd m5, m4, [r6-32*2] 4904*c0909341SAndroid Build Coastguard Worker paddd m4, [r6-32*1] 4905*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2 }, m0, m1, m2, m3, m4, m5, m6, m7 4906*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 4907*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 4908*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 4909*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 4910*c0909341SAndroid Build Coastguard Worker add r6, 32*8 4911*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m9 4912*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m10 4913*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m11 4914*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m12 4915*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 4916*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 4917*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 4918*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 4919*c0909341SAndroid Build Coastguard Worker.fast: 4920*c0909341SAndroid Build Coastguard Worker add r6, 32*8 4921*c0909341SAndroid Build Coastguard Worker call .main 4922*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_5120] 4923*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pd_5119] 4924*c0909341SAndroid Build Coastguard Worker psrld m15, 10 ; pd_2 4925*c0909341SAndroid Build Coastguard Worker paddd m0, m15 4926*c0909341SAndroid Build Coastguard Worker psubd m1, m15, m1 4927*c0909341SAndroid Build Coastguard Worker paddd m2, m15 4928*c0909341SAndroid Build Coastguard Worker psubd m3, m15, m3 4929*c0909341SAndroid Build Coastguard Worker paddd m4, m14 4930*c0909341SAndroid Build Coastguard Worker psubd m5, m13, m5 4931*c0909341SAndroid Build Coastguard Worker paddd m6, m14 4932*c0909341SAndroid Build Coastguard Worker psubd m7, m13, m7 4933*c0909341SAndroid Build Coastguard Worker paddd m8, m14, m9 4934*c0909341SAndroid Build Coastguard Worker psubd m9, m13, m10 4935*c0909341SAndroid Build Coastguard Worker paddd m10, m14, m11 4936*c0909341SAndroid Build Coastguard Worker psubd m11, m13, m12 4937*c0909341SAndroid Build Coastguard Worker paddd m12, m15, [r6-32*1] 4938*c0909341SAndroid Build Coastguard Worker psubd m13, m15, [r6-32*2] 4939*c0909341SAndroid Build Coastguard Worker paddd m14, m15, [r6-32*3] 4940*c0909341SAndroid Build Coastguard Worker psubd m15, [r6-32*4] 4941*c0909341SAndroid Build Coastguard Worker.pass1_end: 4942*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2 }, m0, m1, m2, m3, m12, m13, m14, m15 4943*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m4, m5, m6, m7, m8, m9, m10, m11 4944*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 4945*c0909341SAndroid Build Coastguard Worker jmp tx2q 4946*c0909341SAndroid Build Coastguard Worker.pass2: 4947*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).transpose 4948*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 4949*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 4950*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main 4951*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass2_end 4952*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m8 4953*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m12 4954*c0909341SAndroid Build Coastguard Worker mova [rsp+32*3], m13 4955*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 4956*c0909341SAndroid Build Coastguard Worker pxor m13, m13 4957*c0909341SAndroid Build Coastguard Worker psubw m13, m12 4958*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 4959*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, [rsp+32*1] 4960*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m9 4961*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12 4962*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13 4963*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 4964*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m4 4965*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, m5 4966*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m6 4967*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m7 4968*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4969*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+32*0] 4970*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, [rsp+32*1] 4971*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m10 4972*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m11 4973*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4974*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+32*2] 4975*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m13, [rsp+32*3] 4976*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m14 4977*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m13, m15 4978*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 4979*c0909341SAndroid Build Coastguard Worker RET 4980*c0909341SAndroid Build Coastguard WorkerALIGN function_align 4981*c0909341SAndroid Build Coastguard Worker.main: 4982*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 2] 4983*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*13] 4984*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 6] 4985*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 9] 4986*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64*10] 4987*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64* 5] 4988*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*14] 4989*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 1] 4990*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_2048] 4991*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_part1 4992*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 4993*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*15] 4994*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 4] 4995*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*11] 4996*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 8] 4997*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64* 7] 4998*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*12] 4999*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64* 3] 5000*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x8_internal_10bpc).main_part2 5001*c0909341SAndroid Build Coastguard Worker 5002*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct 5003*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst 5004*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst 5005*c0909341SAndroid Build Coastguard Worker 5006*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5007*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 5008*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 5009*c0909341SAndroid Build Coastguard Worker.pass1: 5010*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 5011*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 5012*c0909341SAndroid Build Coastguard Worker sub eobd, 36 5013*c0909341SAndroid Build Coastguard Worker jl .fast 5014*c0909341SAndroid Build Coastguard Worker add cq, 32 5015*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_10bpc).main 5016*c0909341SAndroid Build Coastguard Worker sub cq, 32 5017*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5120] 5018*c0909341SAndroid Build Coastguard Worker paddd m11, m8 5019*c0909341SAndroid Build Coastguard Worker paddd m9, m8 5020*c0909341SAndroid Build Coastguard Worker paddd m6, m8 5021*c0909341SAndroid Build Coastguard Worker paddd m4, m8 5022*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_5119] 5023*c0909341SAndroid Build Coastguard Worker psubd m12, m8, m12 5024*c0909341SAndroid Build Coastguard Worker psubd m10, m8, m10 5025*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m7 5026*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m5 5027*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m12, m11, m10, m9, m7, m6, m5, m4 5028*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m12 5029*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m11 5030*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m10 5031*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m9 5032*c0909341SAndroid Build Coastguard Worker psrld m9, m15, 10 ; pd_2 5033*c0909341SAndroid Build Coastguard Worker psubd m3, m9, m3 5034*c0909341SAndroid Build Coastguard Worker paddd m2, m9 5035*c0909341SAndroid Build Coastguard Worker psubd m1, m9, m1 5036*c0909341SAndroid Build Coastguard Worker paddd m0, m9 5037*c0909341SAndroid Build Coastguard Worker psubd m12, m9, [r6-32*4] 5038*c0909341SAndroid Build Coastguard Worker paddd m11, m9, [r6-32*3] 5039*c0909341SAndroid Build Coastguard Worker psubd m10, m9, [r6-32*2] 5040*c0909341SAndroid Build Coastguard Worker paddd m9, [r6-32*1] 5041*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2 }, m12, m11, m10, m9, m3, m2, m1, m0 5042*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m12 5043*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m11 5044*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m10 5045*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m9 5046*c0909341SAndroid Build Coastguard Worker add r6, 32*8 5047*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m7 5048*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m6 5049*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m5 5050*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m4 5051*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m3 5052*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m2 5053*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m1 5054*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m0 5055*c0909341SAndroid Build Coastguard Worker.fast: 5056*c0909341SAndroid Build Coastguard Worker add r6, 32*8 5057*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_10bpc).main 5058*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_5120] 5059*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pd_5119] 5060*c0909341SAndroid Build Coastguard Worker psrld m15, 10 ; pd_2 5061*c0909341SAndroid Build Coastguard Worker psubd m8, m13, m7 5062*c0909341SAndroid Build Coastguard Worker paddd m7, m14, m9 5063*c0909341SAndroid Build Coastguard Worker paddd m9, m14, m6 5064*c0909341SAndroid Build Coastguard Worker psubd m6, m13, m10 5065*c0909341SAndroid Build Coastguard Worker psubd m10, m13, m5 5066*c0909341SAndroid Build Coastguard Worker paddd m5, m14, m11 5067*c0909341SAndroid Build Coastguard Worker paddd m11, m14, m4 5068*c0909341SAndroid Build Coastguard Worker psubd m4, m13, m12 5069*c0909341SAndroid Build Coastguard Worker psubd m12, m15, m3 5070*c0909341SAndroid Build Coastguard Worker paddd m3, m15, [r6-32*1] 5071*c0909341SAndroid Build Coastguard Worker paddd m13, m15, m2 5072*c0909341SAndroid Build Coastguard Worker psubd m2, m15, [r6-32*2] 5073*c0909341SAndroid Build Coastguard Worker psubd m14, m15, m1 5074*c0909341SAndroid Build Coastguard Worker mova m1, m15 5075*c0909341SAndroid Build Coastguard Worker paddd m15, m0 5076*c0909341SAndroid Build Coastguard Worker psubd m0, m1, [r6-32*4] 5077*c0909341SAndroid Build Coastguard Worker paddd m1, [r6-32*3] 5078*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_10bpc).pass1_end 5079*c0909341SAndroid Build Coastguard Worker.pass2: 5080*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).transpose 5081*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 5082*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 5083*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main 5084*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_8bpc).main_pass2_end 5085*c0909341SAndroid Build Coastguard Worker mova [rsp+32*3], m3 5086*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m2 5087*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m0 5088*c0909341SAndroid Build Coastguard Worker mova m2, m13 5089*c0909341SAndroid Build Coastguard Worker mova m3, m12 5090*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 5091*c0909341SAndroid Build Coastguard Worker pxor m13, m13 5092*c0909341SAndroid Build Coastguard Worker psubw m13, m12 5093*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m13, m15 5094*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m14 5095*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13 5096*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12 5097*c0909341SAndroid Build Coastguard Worker mova m14, m8 5098*c0909341SAndroid Build Coastguard Worker mova m15, m9 5099*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 5100*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m13, m11 5101*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m10 5102*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13, m15 5103*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m14 5104*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5105*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m13, m7 5106*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m6 5107*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13, m5 5108*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m4 5109*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5110*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m13, [rsp+32*3] 5111*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, [rsp+32*2] 5112*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m13, [rsp+32*1] 5113*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, [rsp+32*0] 5114*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5115*c0909341SAndroid Build Coastguard Worker RET 5116*c0909341SAndroid Build Coastguard Worker 5117*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct, -92 5118*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity 5119*c0909341SAndroid Build Coastguard Worker 5120*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_10bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5121*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_5793] 5122*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_5120] 5123*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 5124*c0909341SAndroid Build Coastguard Worker sub eobd, 36 5125*c0909341SAndroid Build Coastguard Worker jl .fast 5126*c0909341SAndroid Build Coastguard Worker mov r3, -32*8*4 5127*c0909341SAndroid Build Coastguard Worker.righthalf: 5128*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+r3+32*33] 5129*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+r3+32*35] 5130*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+r3+32*37] 5131*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+r3+32*39] 5132*c0909341SAndroid Build Coastguard Worker add r6, 32*4 5133*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3 5134*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m0, m1, m2, m3 5135*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 5136*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m1 5137*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m2 5138*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m3 5139*c0909341SAndroid Build Coastguard Worker add r3, 32*8 5140*c0909341SAndroid Build Coastguard Worker jl .righthalf 5141*c0909341SAndroid Build Coastguard Worker.fast: 5142*c0909341SAndroid Build Coastguard Worker pmulld m0, m15, [cq+64* 0] 5143*c0909341SAndroid Build Coastguard Worker pmulld m1, m15, [cq+64* 1] 5144*c0909341SAndroid Build Coastguard Worker pmulld m2, m15, [cq+64* 2] 5145*c0909341SAndroid Build Coastguard Worker pmulld m3, m15, [cq+64* 3] 5146*c0909341SAndroid Build Coastguard Worker pmulld m4, m15, [cq+64* 4] 5147*c0909341SAndroid Build Coastguard Worker pmulld m5, m15, [cq+64* 5] 5148*c0909341SAndroid Build Coastguard Worker pmulld m6, m15, [cq+64* 6] 5149*c0909341SAndroid Build Coastguard Worker pmulld m8, m15, [cq+64* 7] 5150*c0909341SAndroid Build Coastguard Worker mova [cq], m8 5151*c0909341SAndroid Build Coastguard Worker pmulld m8, m15, [cq+64* 8] 5152*c0909341SAndroid Build Coastguard Worker pmulld m9, m15, [cq+64* 9] 5153*c0909341SAndroid Build Coastguard Worker pmulld m10, m15, [cq+64*10] 5154*c0909341SAndroid Build Coastguard Worker pmulld m11, m15, [cq+64*11] 5155*c0909341SAndroid Build Coastguard Worker pmulld m12, m15, [cq+64*12] 5156*c0909341SAndroid Build Coastguard Worker pmulld m13, m15, [cq+64*13] 5157*c0909341SAndroid Build Coastguard Worker pmulld m14, m15, [cq+64*14] 5158*c0909341SAndroid Build Coastguard Worker pmulld m15, [cq+64*15] 5159*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m7}, m0, m1, m2, m3, m4, m5, m6, \ 5160*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 5161*c0909341SAndroid Build Coastguard Worker paddd m7, [cq] 5162*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 13}, m0, m1, m2, m3, m4, m5, m6, m7, \ 5163*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 5164*c0909341SAndroid Build Coastguard Worker jmp tx2q 5165*c0909341SAndroid Build Coastguard Worker.pass2: 5166*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).transpose 5167*c0909341SAndroid Build Coastguard Worker 5168*c0909341SAndroid Build Coastguard Worker mova [cq+32*0], m15 5169*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m0 5170*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pw_1697x16] 5171*c0909341SAndroid Build Coastguard Worker 5172*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 0, 15}, 1, 2, 3, 4, 5, 6, 7, \ 5173*c0909341SAndroid Build Coastguard Worker 8, 9, 10, 11, 12, 13, 14 5174*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*1] 5175*c0909341SAndroid Build Coastguard Worker mova [cq+32*1], m1 5176*c0909341SAndroid Build Coastguard Worker IDTX16 0, 1, 15 5177*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*0] 5178*c0909341SAndroid Build Coastguard Worker pmulhrsw m15, m1 5179*c0909341SAndroid Build Coastguard Worker paddsw m1, m1 5180*c0909341SAndroid Build Coastguard Worker paddsw m15, m1 5181*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*1] 5182*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).end 5183*c0909341SAndroid Build Coastguard Worker 5184*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, dct, 0, 12 5185*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, identity, 28, 12 5186*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, adst, 0, 12 5187*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN dct, flipadst, 0, 12 5188*c0909341SAndroid Build Coastguard Worker 5189*c0909341SAndroid Build Coastguard Workercglobal idct_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5190*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 5191*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 5192*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).pass1 5193*c0909341SAndroid Build Coastguard Worker.pass2: 5194*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m8 5195*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m9 5196*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m10 5197*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m11 5198*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m12 5199*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m13 5200*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m14 5201*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m15 5202*c0909341SAndroid Build Coastguard Worker call .pass2_main 5203*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 5204*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 5205*c0909341SAndroid Build Coastguard Worker packssdw m2, m4, m5 5206*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 5207*c0909341SAndroid Build Coastguard Worker packssdw m4, m8, m9 5208*c0909341SAndroid Build Coastguard Worker packssdw m5, m10, m11 5209*c0909341SAndroid Build Coastguard Worker packssdw m6, m12, m13 5210*c0909341SAndroid Build Coastguard Worker packssdw m7, m14, m15 5211*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 5212*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 5213*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 5214*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 5215*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 5216*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 5217*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 5218*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 5219*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 8] 5220*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 9] 5221*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*10] 5222*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*11] 5223*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*12] 5224*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*13] 5225*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*14] 5226*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*15] 5227*c0909341SAndroid Build Coastguard Worker mov r5, r6 5228*c0909341SAndroid Build Coastguard Worker add r6, 32*16 5229*c0909341SAndroid Build Coastguard Worker call .pass2_main 5230*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_12bpc).end 5231*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5232*c0909341SAndroid Build Coastguard Worker.write_16x16: 5233*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m8 5234*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m9 5235*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m12 5236*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_16384] 5237*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5238*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5239*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12 5240*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12 5241*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).write_16x4_start 5242*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5243*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_10bpc).write_16x16_2 5244*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5245*c0909341SAndroid Build Coastguard Worker.pass2_main: 5246*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).transpose_8x8 5247*c0909341SAndroid Build Coastguard Worker mova [cq+32* 0], m0 5248*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m2 5249*c0909341SAndroid Build Coastguard Worker mova [cq+32* 2], m4 5250*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m6 5251*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 5252*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 5253*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, m1 5254*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, m3 5255*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, m5 5256*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, m7 5257*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 5258*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5259*c0909341SAndroid Build Coastguard Worker jge .pass2_slow 5260*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5261*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5262*c0909341SAndroid Build Coastguard Worker jmp .pass2_fast 5263*c0909341SAndroid Build Coastguard Worker.pass2_slow: 5264*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 5265*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*4] 5266*c0909341SAndroid Build Coastguard Worker mova m4, [r6-32*3] 5267*c0909341SAndroid Build Coastguard Worker mova m10, [r6-32*2] 5268*c0909341SAndroid Build Coastguard Worker mova m5, [r6-32*1] 5269*c0909341SAndroid Build Coastguard Worker mova m12, [r6+32*0] 5270*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32*1] 5271*c0909341SAndroid Build Coastguard Worker mova m14, [r6+32*2] 5272*c0909341SAndroid Build Coastguard Worker mova m7, [r6+32*3] 5273*c0909341SAndroid Build Coastguard Worker TRANSPOSE_8X8_DWORD 8, 4, 10, 5, 12, 6, 14, 7, 9, 11, 13, 15 5274*c0909341SAndroid Build Coastguard Worker mova [cq+32* 4], m8 5275*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m10 5276*c0909341SAndroid Build Coastguard Worker mova [cq+32* 6], m12 5277*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m14 5278*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 5279*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 5280*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m4, m5, m6, m7 5281*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m4, m5, m6, m7 5282*c0909341SAndroid Build Coastguard Worker.pass2_fast: 5283*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 5284*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 5285*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 5286*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+32* 0] 5287*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+32* 1] 5288*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+32* 2] 5289*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+32* 3] 5290*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 5291*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5292*c0909341SAndroid Build Coastguard Worker jge .pass2_slow2 5293*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5294*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5295*c0909341SAndroid Build Coastguard Worker jmp .pass2_fast2 5296*c0909341SAndroid Build Coastguard Worker.pass2_slow2: 5297*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+32* 4] 5298*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+32* 5] 5299*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+32* 6] 5300*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+32* 7] 5301*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m4, m5, m6, m7 5302*c0909341SAndroid Build Coastguard Worker.pass2_fast2: 5303*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 5304*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 5305*c0909341SAndroid Build Coastguard Worker psrad m11, 8 ; pd_8 5306*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 5307*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).pass1_rotations 5308*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4}, m0, m1, m2, m3, m4, m5, m6, m7, \ 5309*c0909341SAndroid Build Coastguard Worker m8, m9, m10, m11, m12, m13, m14, m15 5310*c0909341SAndroid Build Coastguard Worker ret 5311*c0909341SAndroid Build Coastguard Worker 5312*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, dct, 0, 12 5313*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, adst, 0, 12 5314*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN adst, flipadst, 0, 12 5315*c0909341SAndroid Build Coastguard Worker 5316*c0909341SAndroid Build Coastguard Workercglobal iadst_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5317*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_min] 5318*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_20b_max] 5319*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_10bpc).pass1 5320*c0909341SAndroid Build Coastguard Worker.pass2: 5321*c0909341SAndroid Build Coastguard Worker call .pass2_part1 5322*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).pass1_rotations 5323*c0909341SAndroid Build Coastguard Worker call .pass2_part2 5324*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).pass1_rotations 5325*c0909341SAndroid Build Coastguard Worker.pass2_part3: 5326*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4 }, m0, m1, m2, m3, m12, m13, m14, m15 5327*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m4, m5, m6, m7, m8, m9, m10, m11 5328*c0909341SAndroid Build Coastguard Worker.end: 5329*c0909341SAndroid Build Coastguard Worker packssdw m15, m14 5330*c0909341SAndroid Build Coastguard Worker packssdw m14, m13, m12 5331*c0909341SAndroid Build Coastguard Worker packssdw m13, m11, m10 5332*c0909341SAndroid Build Coastguard Worker packssdw m12, m9, m8 5333*c0909341SAndroid Build Coastguard Worker packssdw m11, m7, m6 5334*c0909341SAndroid Build Coastguard Worker packssdw m10, m5, m4 5335*c0909341SAndroid Build Coastguard Worker packssdw m7, m3, m2 5336*c0909341SAndroid Build Coastguard Worker packssdw m6, m1, m0 5337*c0909341SAndroid Build Coastguard Worker vpblendd m0, m6, [r5-32*4], 0x33 5338*c0909341SAndroid Build Coastguard Worker vpblendd m1, m6, [r5-32*4], 0xcc 5339*c0909341SAndroid Build Coastguard Worker vpblendd m2, m7, [r5-32*3], 0x33 5340*c0909341SAndroid Build Coastguard Worker vpblendd m3, m7, [r5-32*3], 0xcc 5341*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 5342*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 5343*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 5344*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 5345*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).write_16x4_start 5346*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5347*c0909341SAndroid Build Coastguard Worker vpblendd m0, m10, [r5-32*2], 0x33 5348*c0909341SAndroid Build Coastguard Worker vpblendd m1, m10, [r5-32*2], 0xcc 5349*c0909341SAndroid Build Coastguard Worker vpblendd m2, m11, [r5-32*1], 0x33 5350*c0909341SAndroid Build Coastguard Worker vpblendd m3, m11, [r5-32*1], 0xcc 5351*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 5352*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 5353*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 5354*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 5355*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5356*c0909341SAndroid Build Coastguard Worker vpblendd m0, m12, [r5+32*0], 0x33 5357*c0909341SAndroid Build Coastguard Worker vpblendd m1, m12, [r5+32*0], 0xcc 5358*c0909341SAndroid Build Coastguard Worker vpblendd m2, m13, [r5+32*1], 0x33 5359*c0909341SAndroid Build Coastguard Worker vpblendd m3, m13, [r5+32*1], 0xcc 5360*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 5361*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 5362*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 5363*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 5364*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5365*c0909341SAndroid Build Coastguard Worker vpblendd m0, m14, [r5+32*2], 0x33 5366*c0909341SAndroid Build Coastguard Worker vpblendd m1, m14, [r5+32*2], 0xcc 5367*c0909341SAndroid Build Coastguard Worker vpblendd m2, m15, [r5+32*3], 0x33 5368*c0909341SAndroid Build Coastguard Worker vpblendd m3, m15, [r5+32*3], 0xcc 5369*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 5370*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 5371*c0909341SAndroid Build Coastguard Worker vpermq m2, m2, q3120 5372*c0909341SAndroid Build Coastguard Worker vpermq m3, m3, q2031 5373*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_zero 5374*c0909341SAndroid Build Coastguard Worker RET 5375*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5376*c0909341SAndroid Build Coastguard Worker.pass2_part1: 5377*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m8 5378*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m9 5379*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m10 5380*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m11 5381*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m12 5382*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m13 5383*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m14 5384*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m15 5385*c0909341SAndroid Build Coastguard Worker.pass2_main: 5386*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).transpose_8x8 5387*c0909341SAndroid Build Coastguard Worker mova [cq+32* 0], m0 5388*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m3 5389*c0909341SAndroid Build Coastguard Worker mova [cq+32* 2], m4 5390*c0909341SAndroid Build Coastguard Worker mova [cq+32* 3], m7 5391*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 5392*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 5393*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m13, m2 5394*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m13, m6 5395*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m13, m5 5396*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m13, m1 5397*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m0, m2, m5, m7 5398*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5399*c0909341SAndroid Build Coastguard Worker jge .pass2_slow 5400*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5401*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m3, m4, m6 5402*c0909341SAndroid Build Coastguard Worker jmp .pass2_fast 5403*c0909341SAndroid Build Coastguard Worker.pass2_slow: 5404*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 5405*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*4] 5406*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32*3] 5407*c0909341SAndroid Build Coastguard Worker mova m4, [r6-32*2] 5408*c0909341SAndroid Build Coastguard Worker mova m11, [r6-32*1] 5409*c0909341SAndroid Build Coastguard Worker mova m12, [r6+32*0] 5410*c0909341SAndroid Build Coastguard Worker mova m1, [r6+32*1] 5411*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32*2] 5412*c0909341SAndroid Build Coastguard Worker mova m15, [r6+32*3] 5413*c0909341SAndroid Build Coastguard Worker TRANSPOSE_8X8_DWORD 8, 3, 4, 11, 12, 1, 6, 15, 13, 9, 10, 14 5414*c0909341SAndroid Build Coastguard Worker mova [cq+32* 4], m8 5415*c0909341SAndroid Build Coastguard Worker mova [cq+32* 5], m11 5416*c0909341SAndroid Build Coastguard Worker mova [cq+32* 6], m12 5417*c0909341SAndroid Build Coastguard Worker mova [cq+32* 7], m15 5418*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_min] 5419*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_18b_max] 5420*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m13}, m1, m3, m4, m6 5421*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m1, m3, m4, m6 5422*c0909341SAndroid Build Coastguard Worker.pass2_fast: 5423*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pd_2048] 5424*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2896] 5425*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_part1 5426*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m13, [cq+32* 0] ; 0 5427*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m13, [cq+32* 1] ; 3 5428*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m13, [cq+32* 2] ; 4 5429*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m13, [cq+32* 3] ; 7 5430*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m0, m2, m5, m7 5431*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5432*c0909341SAndroid Build Coastguard Worker jge .pass2_slow2 5433*c0909341SAndroid Build Coastguard Worker pxor m1, m1 5434*c0909341SAndroid Build Coastguard Worker REPX {mova x, m1}, m3, m4, m6 5435*c0909341SAndroid Build Coastguard Worker jmp .pass2_fast2 5436*c0909341SAndroid Build Coastguard Worker.pass2_slow2: 5437*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m13, [cq+32* 4] ; 8 5438*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m13, [cq+32* 5] ; 11 5439*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m13, [cq+32* 6] ; 12 5440*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m13, [cq+32* 7] ; 15 5441*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m14}, m1, m3, m4, m6 5442*c0909341SAndroid Build Coastguard Worker.pass2_fast2: 5443*c0909341SAndroid Build Coastguard Worker call m(iadst_16x8_internal_10bpc).main_part2 5444*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_17408] 5445*c0909341SAndroid Build Coastguard Worker psrld m15, 11 ; pd_1 5446*c0909341SAndroid Build Coastguard Worker psubd m13, m14, m15 ; pd_17407 5447*c0909341SAndroid Build Coastguard Worker pslld m15, 3 ; pd_8 5448*c0909341SAndroid Build Coastguard Worker ret 5449*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5450*c0909341SAndroid Build Coastguard Worker.pass2_part2: 5451*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 4 }, m0, m1, m2, m3, m12, m13, m14, m15 5452*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 15}, m4, m5, m6, m7, m8, m9, m10, m11 5453*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 5454*c0909341SAndroid Build Coastguard Worker packssdw m1, m2, m3 5455*c0909341SAndroid Build Coastguard Worker packssdw m2, m4, m5 5456*c0909341SAndroid Build Coastguard Worker packssdw m3, m6, m7 5457*c0909341SAndroid Build Coastguard Worker packssdw m4, m8, m9 5458*c0909341SAndroid Build Coastguard Worker packssdw m5, m10, m11 5459*c0909341SAndroid Build Coastguard Worker packssdw m6, m12, m13 5460*c0909341SAndroid Build Coastguard Worker packssdw m7, m14, m15 5461*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 5462*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 5463*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 5464*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 5465*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 5466*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 5467*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 5468*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 5469*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 8] 5470*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 9] 5471*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*10] 5472*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*11] 5473*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*12] 5474*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*13] 5475*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*14] 5476*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*15] 5477*c0909341SAndroid Build Coastguard Worker mov r5, r6 5478*c0909341SAndroid Build Coastguard Worker add r6, 32*16 5479*c0909341SAndroid Build Coastguard Worker jmp .pass2_main 5480*c0909341SAndroid Build Coastguard Worker 5481*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, dct, 0, 12 5482*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, adst, 0, 12 5483*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN flipadst, flipadst, 0, 12 5484*c0909341SAndroid Build Coastguard Worker 5485*c0909341SAndroid Build Coastguard Workercglobal iflipadst_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5486*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_min] 5487*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [clip_20b_max] 5488*c0909341SAndroid Build Coastguard Worker jmp m(iflipadst_16x16_internal_10bpc).pass1 5489*c0909341SAndroid Build Coastguard Worker.pass2: 5490*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_12bpc).pass2_part1 5491*c0909341SAndroid Build Coastguard Worker call m(iflipadst_16x8_internal_10bpc).pass1_rotations 5492*c0909341SAndroid Build Coastguard Worker call m(iadst_16x16_internal_12bpc).pass2_part2 5493*c0909341SAndroid Build Coastguard Worker call m(iflipadst_16x8_internal_10bpc).pass1_rotations 5494*c0909341SAndroid Build Coastguard Worker jmp m(iadst_16x16_internal_12bpc).pass2_part3 5495*c0909341SAndroid Build Coastguard Worker 5496*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, dct, -92, 12 5497*c0909341SAndroid Build Coastguard WorkerINV_TXFM_16X16_FN identity, identity, 0, 12 5498*c0909341SAndroid Build Coastguard Worker 5499*c0909341SAndroid Build Coastguard Worker%macro IDTX16_12BPC 1 ; src 5500*c0909341SAndroid Build Coastguard Worker pmulld m6, m7, m%1 5501*c0909341SAndroid Build Coastguard Worker paddd m6, m15 5502*c0909341SAndroid Build Coastguard Worker psrad m6, 12 5503*c0909341SAndroid Build Coastguard Worker paddd m6, m%1 5504*c0909341SAndroid Build Coastguard Worker psrad m%1, m6, 1 5505*c0909341SAndroid Build Coastguard Worker%endmacro 5506*c0909341SAndroid Build Coastguard Worker 5507*c0909341SAndroid Build Coastguard Workercglobal iidentity_16x16_internal_12bpc, 0, 7, 16, 32*24, dst, stride, c, eob, tx2 5508*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_1697] 5509*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_5120] 5510*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 5511*c0909341SAndroid Build Coastguard Worker sub eobd, 36 5512*c0909341SAndroid Build Coastguard Worker jl .fast 5513*c0909341SAndroid Build Coastguard Worker mov r3, -32*8*4 5514*c0909341SAndroid Build Coastguard Worker.righthalf: 5515*c0909341SAndroid Build Coastguard Worker mova m10, [cq+r3+32*33] 5516*c0909341SAndroid Build Coastguard Worker mova m11, [cq+r3+32*35] 5517*c0909341SAndroid Build Coastguard Worker mova m12, [cq+r3+32*37] 5518*c0909341SAndroid Build Coastguard Worker mova m13, [cq+r3+32*39] 5519*c0909341SAndroid Build Coastguard Worker add r6, 32*4 5520*c0909341SAndroid Build Coastguard Worker pmulld m0, m7, m10 5521*c0909341SAndroid Build Coastguard Worker pmulld m1, m7, m11 5522*c0909341SAndroid Build Coastguard Worker pmulld m2, m7, m12 5523*c0909341SAndroid Build Coastguard Worker pmulld m3, m7, m13 5524*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m15}, m0, m1, m2, m3 5525*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 5526*c0909341SAndroid Build Coastguard Worker paddd m0, m10 5527*c0909341SAndroid Build Coastguard Worker paddd m1, m11 5528*c0909341SAndroid Build Coastguard Worker paddd m2, m12 5529*c0909341SAndroid Build Coastguard Worker paddd m3, m13 5530*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1 }, m0, m1, m2, m3 5531*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 5532*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m1 5533*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m2 5534*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m3 5535*c0909341SAndroid Build Coastguard Worker add r3, 32*8 5536*c0909341SAndroid Build Coastguard Worker jl .righthalf 5537*c0909341SAndroid Build Coastguard Worker.fast: 5538*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 5539*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 1] 5540*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64* 2] 5541*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 3] 5542*c0909341SAndroid Build Coastguard Worker mova m4, [cq+64* 4] 5543*c0909341SAndroid Build Coastguard Worker mova m5, [cq+64* 5] 5544*c0909341SAndroid Build Coastguard Worker mova m8, [cq+64* 6] 5545*c0909341SAndroid Build Coastguard Worker mova m9, [cq+64* 7] 5546*c0909341SAndroid Build Coastguard Worker REPX {IDTX16_12BPC x}, 0, 1, 2, 3, 4, 5, 8, 9 5547*c0909341SAndroid Build Coastguard Worker mova [cq+64*0], m8 5548*c0909341SAndroid Build Coastguard Worker mova [cq+64*1], m9 5549*c0909341SAndroid Build Coastguard Worker mova m8, [cq+64* 8] 5550*c0909341SAndroid Build Coastguard Worker mova m9, [cq+64* 9] 5551*c0909341SAndroid Build Coastguard Worker mova m10, [cq+64*10] 5552*c0909341SAndroid Build Coastguard Worker mova m11, [cq+64*11] 5553*c0909341SAndroid Build Coastguard Worker mova m12, [cq+64*12] 5554*c0909341SAndroid Build Coastguard Worker mova m13, [cq+64*13] 5555*c0909341SAndroid Build Coastguard Worker mova m14, [cq+64*14] 5556*c0909341SAndroid Build Coastguard Worker REPX {IDTX16_12BPC x}, 8, 9, 10, 11, 12, 13, 14 5557*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*15] 5558*c0909341SAndroid Build Coastguard Worker pmulld m7, m6 5559*c0909341SAndroid Build Coastguard Worker paddd m7, m15 5560*c0909341SAndroid Build Coastguard Worker psrad m7, 12 5561*c0909341SAndroid Build Coastguard Worker paddd m7, m6 5562*c0909341SAndroid Build Coastguard Worker mova m6, [cq+64*0] 5563*c0909341SAndroid Build Coastguard Worker psrad m15, m7, 1 5564*c0909341SAndroid Build Coastguard Worker mova m7, [cq+64*1] 5565*c0909341SAndroid Build Coastguard Worker jmp tx2q 5566*c0909341SAndroid Build Coastguard Worker.pass2: 5567*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x16_internal_12bpc).pass2_main 5568*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_10bpc).transpose_fast 5569*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5570*c0909341SAndroid Build Coastguard Worker jl .pass2_fast 5571*c0909341SAndroid Build Coastguard Worker mova [cq+32* 8], m0 5572*c0909341SAndroid Build Coastguard Worker mova [cq+32* 9], m1 5573*c0909341SAndroid Build Coastguard Worker mova [cq+32*10], m2 5574*c0909341SAndroid Build Coastguard Worker mova [cq+32*11], m3 5575*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m4 5576*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m5 5577*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m6 5578*c0909341SAndroid Build Coastguard Worker mova [cq+32*15], m7 5579*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*4] 5580*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*3] 5581*c0909341SAndroid Build Coastguard Worker mova m10, [r6-32*2] 5582*c0909341SAndroid Build Coastguard Worker mova m11, [r6-32*1] 5583*c0909341SAndroid Build Coastguard Worker mova m12, [r6+32*0] 5584*c0909341SAndroid Build Coastguard Worker mova m13, [r6+32*1] 5585*c0909341SAndroid Build Coastguard Worker mova m14, [r6+32*2] 5586*c0909341SAndroid Build Coastguard Worker mova m15, [r6+32*3] 5587*c0909341SAndroid Build Coastguard Worker sub r6, 32*8 5588*c0909341SAndroid Build Coastguard Worker mova m0, [r6-32*4] 5589*c0909341SAndroid Build Coastguard Worker mova m1, [r6-32*3] 5590*c0909341SAndroid Build Coastguard Worker mova m2, [r6-32*2] 5591*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32*1] 5592*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32*0] 5593*c0909341SAndroid Build Coastguard Worker mova m5, [r6+32*1] 5594*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32*2] 5595*c0909341SAndroid Build Coastguard Worker mova m7, [r6+32*3] 5596*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x16_internal_12bpc).pass2_main 5597*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose2 5598*c0909341SAndroid Build Coastguard Worker mova m8, m0 5599*c0909341SAndroid Build Coastguard Worker mova m9, m1 5600*c0909341SAndroid Build Coastguard Worker mova m10, m2 5601*c0909341SAndroid Build Coastguard Worker mova m11, m3 5602*c0909341SAndroid Build Coastguard Worker mova m12, m4 5603*c0909341SAndroid Build Coastguard Worker mova m13, m5 5604*c0909341SAndroid Build Coastguard Worker mova m14, m6 5605*c0909341SAndroid Build Coastguard Worker mova m15, m7 5606*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 8] 5607*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 9] 5608*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*10] 5609*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*11] 5610*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*12] 5611*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*13] 5612*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*14] 5613*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*15] 5614*c0909341SAndroid Build Coastguard Worker.pass2_fast: 5615*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_12bpc).write_16x16 5616*c0909341SAndroid Build Coastguard Worker RET 5617*c0909341SAndroid Build Coastguard Worker 5618*c0909341SAndroid Build Coastguard Worker%macro IDCT32_END 6-7 1 ; in/out1, out2, tmp[1-3], shift, pack 5619*c0909341SAndroid Build Coastguard Worker mova m%4, [r6+32*(%1-4)] 5620*c0909341SAndroid Build Coastguard Worker mova m%2, [r5+32*(3-%1)] 5621*c0909341SAndroid Build Coastguard Worker mova m%5, [r4+32*(%1-4)] 5622*c0909341SAndroid Build Coastguard Worker psubd m%3, m%1, m%4 ; idct16 out15 - n 5623*c0909341SAndroid Build Coastguard Worker paddd m%1, m%4 ; idct16 out0 + n 5624*c0909341SAndroid Build Coastguard Worker pmaxsd m%1, m12 5625*c0909341SAndroid Build Coastguard Worker pmaxsd m%3, m12 5626*c0909341SAndroid Build Coastguard Worker pminsd m%1, m13 5627*c0909341SAndroid Build Coastguard Worker pminsd m%3, m13 5628*c0909341SAndroid Build Coastguard Worker paddd m%1, m11 5629*c0909341SAndroid Build Coastguard Worker paddd m%3, m11 5630*c0909341SAndroid Build Coastguard Worker psubd m%4, m%1, m%2 ; out31 - n 5631*c0909341SAndroid Build Coastguard Worker paddd m%1, m%2 ; out0 + n 5632*c0909341SAndroid Build Coastguard Worker paddd m%2, m%3, m%5 ; out15 - n 5633*c0909341SAndroid Build Coastguard Worker psubd m%3, m%5 ; out16 + n 5634*c0909341SAndroid Build Coastguard Worker REPX {psrad x, %6}, m%1, m%3, m%2, m%4 5635*c0909341SAndroid Build Coastguard Worker%if %7 & 1 5636*c0909341SAndroid Build Coastguard Worker packssdw m%1, m%3 ; out0 + n, out16 + n 5637*c0909341SAndroid Build Coastguard Worker packssdw m%2, m%4 ; out15 - n, out31 - n 5638*c0909341SAndroid Build Coastguard Worker%endif 5639*c0909341SAndroid Build Coastguard Worker%endmacro 5640*c0909341SAndroid Build Coastguard Worker 5641*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_10bpc, 4, 7, 0, dst, stride, c, eob 5642*c0909341SAndroid Build Coastguard Worker test eobd, eobd 5643*c0909341SAndroid Build Coastguard Worker jz .dconly 5644*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 7, 16, 32*12, dst, stride, c, eob 5645*c0909341SAndroid Build Coastguard Worker%undef cmp 5646*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 5647*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 5648*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 5649*c0909341SAndroid Build Coastguard Worker vbroadcasti128 m14, [idct32_shuf] 5650*c0909341SAndroid Build Coastguard Worker mov r4, cq 5651*c0909341SAndroid Build Coastguard Worker call .pass1_main 5652*c0909341SAndroid Build Coastguard Worker mova [rsp+32*0], m2 5653*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m3 5654*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 5655*c0909341SAndroid Build Coastguard Worker jge .eob43 5656*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5657*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, [rsp+32*2], m2, m3, m11 5658*c0909341SAndroid Build Coastguard Worker jmp .pass1_end_fast 5659*c0909341SAndroid Build Coastguard Worker.eob43: 5660*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*8] 5661*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 5662*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 5663*c0909341SAndroid Build Coastguard Worker call .pass1_main 5664*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m2 5665*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 5666*c0909341SAndroid Build Coastguard Worker jge .eob107 5667*c0909341SAndroid Build Coastguard Worker mova m11, m3 5668*c0909341SAndroid Build Coastguard Worker mova m2, m0 5669*c0909341SAndroid Build Coastguard Worker mova m3, m1 5670*c0909341SAndroid Build Coastguard Worker mova m0, [r6-32*4] 5671*c0909341SAndroid Build Coastguard Worker mova m1, [r6-32*3] 5672*c0909341SAndroid Build Coastguard Worker pxor m4, m4 5673*c0909341SAndroid Build Coastguard Worker.pass1_end_fast: 5674*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2048] 5675*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 5676*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 5677*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast 5678*c0909341SAndroid Build Coastguard Worker jmp .end 5679*c0909341SAndroid Build Coastguard Worker.eob107: 5680*c0909341SAndroid Build Coastguard Worker mova [rsp+32*3], m3 5681*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m0 5682*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m1 5683*c0909341SAndroid Build Coastguard Worker call .pass1_main 5684*c0909341SAndroid Build Coastguard Worker cmp eobd, 171 5685*c0909341SAndroid Build Coastguard Worker jge .eob171 5686*c0909341SAndroid Build Coastguard Worker pshufd m12, m2, q1032 5687*c0909341SAndroid Build Coastguard Worker pshufd m13, m3, q1032 5688*c0909341SAndroid Build Coastguard Worker mova m4, m0 5689*c0909341SAndroid Build Coastguard Worker mova m5, m1 5690*c0909341SAndroid Build Coastguard Worker pxor m6, m6 5691*c0909341SAndroid Build Coastguard Worker REPX {mova x, m6}, m7, m14, m15 5692*c0909341SAndroid Build Coastguard Worker jmp .pass1_end 5693*c0909341SAndroid Build Coastguard Worker.eob171: 5694*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m0 5695*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m1 5696*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m2 5697*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m3 5698*c0909341SAndroid Build Coastguard Worker call .pass1_main 5699*c0909341SAndroid Build Coastguard Worker pshufd m12, [r6+32*2], q1032 ; out19 out17 5700*c0909341SAndroid Build Coastguard Worker pshufd m13, [r6+32*3], q1032 ; out23 out21 5701*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32*0] ; out16 out18 5702*c0909341SAndroid Build Coastguard Worker mova m5, [r6+32*1] ; out20 out22 5703*c0909341SAndroid Build Coastguard Worker pshufd m14, m2, q1032 ; out27 out25 5704*c0909341SAndroid Build Coastguard Worker pshufd m15, m3, q1032 ; out31 out29 5705*c0909341SAndroid Build Coastguard Worker mova m6, m0 ; out24 out26 5706*c0909341SAndroid Build Coastguard Worker mova m7, m1 ; out28 out30 5707*c0909341SAndroid Build Coastguard Worker.pass1_end: 5708*c0909341SAndroid Build Coastguard Worker mova m0, [r6-32*4] ; out0 out2 5709*c0909341SAndroid Build Coastguard Worker mova m1, [r6-32*3] ; out4 out6 5710*c0909341SAndroid Build Coastguard Worker mova m2, [r6-32*2] ; out8 out10 5711*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32*1] ; out12 out14 5712*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 5713*c0909341SAndroid Build Coastguard Worker mova m11, [rsp+32*3] ; out13 out15 5714*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_2048] 5715*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_8bpc).main 5716*c0909341SAndroid Build Coastguard Worker.end: ; [rsp+0*32] = m12 5717*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 5718*c0909341SAndroid Build Coastguard Worker mov cq, r4 5719*c0909341SAndroid Build Coastguard Worker mova [rsp+32*1], m8 5720*c0909341SAndroid Build Coastguard Worker mova [rsp+32*2], m9 5721*c0909341SAndroid Build Coastguard Worker mova [rsp+32*3], m10 5722*c0909341SAndroid Build Coastguard Worker mova [rsp+32*4], m11 5723*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 5724*c0909341SAndroid Build Coastguard Worker vpermq m1, m1, q2031 5725*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5726*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5727*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4_start 5728*c0909341SAndroid Build Coastguard Worker vpermq m0, m2, q3120 5729*c0909341SAndroid Build Coastguard Worker vpermq m1, m3, q2031 5730*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5731*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5732*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5733*c0909341SAndroid Build Coastguard Worker vpermq m0, m4, q3120 5734*c0909341SAndroid Build Coastguard Worker vpermq m1, m5, q2031 5735*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5736*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5737*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5738*c0909341SAndroid Build Coastguard Worker vpermq m0, m6, q3120 5739*c0909341SAndroid Build Coastguard Worker vpermq m1, m7, q2031 5740*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5741*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5742*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5743*c0909341SAndroid Build Coastguard Worker vpermq m0, [rsp+32*1], q3120 5744*c0909341SAndroid Build Coastguard Worker vpermq m1, [rsp+32*2], q2031 5745*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5746*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5747*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5748*c0909341SAndroid Build Coastguard Worker vpermq m0, [rsp+32*3], q3120 5749*c0909341SAndroid Build Coastguard Worker vpermq m1, [rsp+32*4], q2031 5750*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5751*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5752*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5753*c0909341SAndroid Build Coastguard Worker vpermq m0, [rsp+32*0], q3120 5754*c0909341SAndroid Build Coastguard Worker vpermq m1, m13, q2031 5755*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5756*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5757*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5758*c0909341SAndroid Build Coastguard Worker vpermq m0, m14, q3120 5759*c0909341SAndroid Build Coastguard Worker vpermq m1, m15, q2031 5760*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 5761*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 5762*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 5763*c0909341SAndroid Build Coastguard Worker RET 5764*c0909341SAndroid Build Coastguard Worker.dconly: 5765*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 5766*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [dconly_10bpc] 5767*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 5768*c0909341SAndroid Build Coastguard Worker or r3d, 32 5769*c0909341SAndroid Build Coastguard Worker add r6d, 640 5770*c0909341SAndroid Build Coastguard Worker sar r6d, 10 5771*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3 5772*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5773*c0909341SAndroid Build Coastguard Worker.pass1_main_part1: 5774*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 5775*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*1] 5776*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*2] 5777*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*3] 5778*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*4] 5779*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*5] 5780*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*6] 5781*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*7] 5782*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 5783*c0909341SAndroid Build Coastguard Worker psrld m1, m11, 10 ; pd_2 5784*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m1}, m0, m6, m5, m3 5785*c0909341SAndroid Build Coastguard Worker paddd m1, m6, m7 ; out1 5786*c0909341SAndroid Build Coastguard Worker psubd m6, m7 ; out6 5787*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m9 ; out7 5788*c0909341SAndroid Build Coastguard Worker paddd m0, m9 ; out0 5789*c0909341SAndroid Build Coastguard Worker paddd m2, m5, m4 ; out2 5790*c0909341SAndroid Build Coastguard Worker psubd m5, m4 ; out5 5791*c0909341SAndroid Build Coastguard Worker psubd m4, m3, m8 ; out4 5792*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; out3 5793*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2 }, m0, m1, m2, m3, m4, m5, m6, m7 5794*c0909341SAndroid Build Coastguard Worker ret 5795*c0909341SAndroid Build Coastguard WorkerALIGN function_align 5796*c0909341SAndroid Build Coastguard Worker.pass1_main: 5797*c0909341SAndroid Build Coastguard Worker call .pass1_main_part1 5798*c0909341SAndroid Build Coastguard Worker add cq, 32 5799*c0909341SAndroid Build Coastguard Worker packssdw m0, m1 5800*c0909341SAndroid Build Coastguard Worker packssdw m2, m3 5801*c0909341SAndroid Build Coastguard Worker packssdw m4, m5 5802*c0909341SAndroid Build Coastguard Worker packssdw m6, m7 5803*c0909341SAndroid Build Coastguard Worker pshufb m0, m14 5804*c0909341SAndroid Build Coastguard Worker pshufb m2, m14 5805*c0909341SAndroid Build Coastguard Worker pshufb m4, m14 5806*c0909341SAndroid Build Coastguard Worker pshufb m6, m14 5807*c0909341SAndroid Build Coastguard Worker punpckhdq m3, m0, m2 5808*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 5809*c0909341SAndroid Build Coastguard Worker punpckldq m2, m4, m6 5810*c0909341SAndroid Build Coastguard Worker punpckhdq m4, m6 5811*c0909341SAndroid Build Coastguard Worker vperm2i128 m1, m0, m2, 0x31 ; 4 6 5812*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm2, 1 ; 0 2 5813*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m3, xm4, 1 ; 1 3 5814*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m4, 0x31 ; 5 7 5815*c0909341SAndroid Build Coastguard Worker ret 5816*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast_rect2: 5817*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 5818*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 5819*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast: ; lower half zero 5820*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_4091] 5821*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_201] 5822*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_m1380] 5823*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_3857] 5824*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_3703] 5825*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1751] 5826*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_m2751] 5827*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3035] 5828*c0909341SAndroid Build Coastguard Worker pmulld m7, m0 5829*c0909341SAndroid Build Coastguard Worker pmulld m0, m8 5830*c0909341SAndroid Build Coastguard Worker pmulld m6, m1 5831*c0909341SAndroid Build Coastguard Worker pmulld m1, m9 5832*c0909341SAndroid Build Coastguard Worker pmulld m5, m2 5833*c0909341SAndroid Build Coastguard Worker pmulld m2, m10 5834*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 5835*c0909341SAndroid Build Coastguard Worker pmulld m3, m15 5836*c0909341SAndroid Build Coastguard Worker jmp .main_oddhalf_part1_fast2 5837*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_rect2: 5838*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 5839*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 5840*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1: ; in1, in7, in9, in15, in17, in23, in25, in31 5841*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 0, 7, 8, 9, 10, _, 201, 4091 ; t16a, t31a 5842*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 1, 8, 9, 10, _, 3857, 1380 ; t19a, t28a 5843*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 5, 8, 9, 10, _, 1751, 3703 ; t18a, t29a 5844*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 3, 8, 9, 10, _, 3035, 2751 ; t17a, t30a 5845*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part1_fast2: 5846*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3 5847*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3 5848*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t17 5849*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t16 5850*c0909341SAndroid Build Coastguard Worker psubd m4, m6, m2 ; t18 5851*c0909341SAndroid Build Coastguard Worker paddd m6, m2 ; t19 5852*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m5 ; t29 5853*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t28 5854*c0909341SAndroid Build Coastguard Worker psubd m5, m7, m3 ; t30 5855*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t31 5856*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m5, m4, m2, m0, m6, m1, m7 5857*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m5, m4, m2, m0, m6, m1, m7 5858*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_4017] 5859*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_799] 5860*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 8, 3, 9, _, 11, 10, 15 ; t17a, t30a 5861*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 4, 3, 9, _, 11, 10, 15, 2 ; t29a, t18a 5862*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m6 ; t19a 5863*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; t16a 5864*c0909341SAndroid Build Coastguard Worker psubd m6, m7, m1 ; t28a 5865*c0909341SAndroid Build Coastguard Worker paddd m7, m1 ; t31a 5866*c0909341SAndroid Build Coastguard Worker psubd m1, m5, m4 ; t18 5867*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t17 5868*c0909341SAndroid Build Coastguard Worker psubd m4, m8, m2 ; t29 5869*c0909341SAndroid Build Coastguard Worker paddd m8, m2 ; t30 5870*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m3, m6, m1, m4, m0, m7, m5, m8 5871*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m3, m6, m1, m4, m0, m7, m5, m8 5872*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3784] 5873*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 5874*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 1, 2, 9, _, 11, 10, 15 ; t18a, t29a 5875*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 3, 2, 9, _, 11, 10, 15 ; t19, t28 5876*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 5877*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m5 5878*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m4 5879*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m6 5880*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m3 5881*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m1 5882*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m8 5883*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 5884*c0909341SAndroid Build Coastguard Worker ret 5885*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast_rect2: 5886*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 5887*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 5888*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast: ; lower half zero 5889*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pd_m601] 5890*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pd_4052] 5891*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [pd_3973] 5892*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pd_995] 5893*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pd_m2106] 5894*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_3513] 5895*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [pd_3290] 5896*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2440] 5897*c0909341SAndroid Build Coastguard Worker pmulld m7, m0 5898*c0909341SAndroid Build Coastguard Worker pmulld m0, m8 5899*c0909341SAndroid Build Coastguard Worker pmulld m6, m1 5900*c0909341SAndroid Build Coastguard Worker pmulld m1, m9 5901*c0909341SAndroid Build Coastguard Worker pmulld m5, m2 5902*c0909341SAndroid Build Coastguard Worker pmulld m2, m10 5903*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 5904*c0909341SAndroid Build Coastguard Worker pmulld m3, m15 5905*c0909341SAndroid Build Coastguard Worker jmp .main_oddhalf_part2_fast2 5906*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_rect2: 5907*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3, m4, m5, m6, m7 5908*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3, m4, m5, m6, m7 5909*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2: ; in3, in5, in11, in13, in19, in21, in27, in29 5910*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 7, 0, 8, 9, 10, _, 4052, 601 ; t23a, t24a 5911*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 6, 8, 9, 10, _, 995, 3973 ; t20a, t27a 5912*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 5, 2, 8, 9, 10, _, 3513, 2106 ; t21a, t26a 5913*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 4, 8, 9, 10, _, 2440, 3290 ; t22a, t25a 5914*c0909341SAndroid Build Coastguard Worker.main_oddhalf_part2_fast2: 5915*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m7, m6, m1, m2, m5, m4, m3 5916*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m4, m6, m2, m1, m5, m7, m3 5917*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m4 ; t25 5918*c0909341SAndroid Build Coastguard Worker paddd m0, m4 ; t24 5919*c0909341SAndroid Build Coastguard Worker psubd m4, m6, m2 ; t26 5920*c0909341SAndroid Build Coastguard Worker paddd m6, m2 ; t27 5921*c0909341SAndroid Build Coastguard Worker psubd m2, m1, m5 ; t21 5922*c0909341SAndroid Build Coastguard Worker paddd m1, m5 ; t20 5923*c0909341SAndroid Build Coastguard Worker psubd m5, m7, m3 ; t22 5924*c0909341SAndroid Build Coastguard Worker paddd m7, m3 ; t23 5925*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m5, m4, m2, m0, m6, m1, m7 5926*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m5, m4, m2, m0, m6, m1, m7 5927*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_2276] 5928*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_3406] 5929*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 2, 3, 9, _, 11, 10, 15 ; t21a, t26a 5930*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 8, 5, 3, 9, _, 11, 10, 15, 2 ; t25a, t22a 5931*c0909341SAndroid Build Coastguard Worker psubd m3, m0, m6 ; t27a 5932*c0909341SAndroid Build Coastguard Worker paddd m0, m6 ; t24a 5933*c0909341SAndroid Build Coastguard Worker psubd m6, m7, m1 ; t20a 5934*c0909341SAndroid Build Coastguard Worker paddd m7, m1 ; t23a 5935*c0909341SAndroid Build Coastguard Worker psubd m1, m5, m4 ; t21 5936*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t22 5937*c0909341SAndroid Build Coastguard Worker psubd m4, m8, m2 ; t26 5938*c0909341SAndroid Build Coastguard Worker paddd m8, m2 ; t25 5939*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m3, m6, m1, m4, m0, m7, m5, m8 5940*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m3, m6, m1, m4, m0, m7, m5, m8 5941*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3784] 5942*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 5943*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 1, 2, 9, _, 11, 10, 15, 2 ; t26a, t21a 5944*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 6, 2, 9, _, 11, 10, 15, 2 ; t27, t20 5945*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*4] ; t16a 5946*c0909341SAndroid Build Coastguard Worker mova m10, [r6-32*3] ; t17 5947*c0909341SAndroid Build Coastguard Worker psubd m2, m9, m7 ; t23 5948*c0909341SAndroid Build Coastguard Worker paddd m9, m7 ; t16 5949*c0909341SAndroid Build Coastguard Worker psubd m7, m10, m5 ; t22a 5950*c0909341SAndroid Build Coastguard Worker paddd m10, m5 ; t17a 5951*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m9, m10, m2, m7 5952*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m9, m10, m2, m7 5953*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m9 5954*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m10 5955*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*2] ; t18a 5956*c0909341SAndroid Build Coastguard Worker mova m10, [r6-32*1] ; t19 5957*c0909341SAndroid Build Coastguard Worker psubd m5, m9, m1 ; t21 5958*c0909341SAndroid Build Coastguard Worker paddd m9, m1 ; t18 5959*c0909341SAndroid Build Coastguard Worker psubd m1, m10, m6 ; t20a 5960*c0909341SAndroid Build Coastguard Worker paddd m10, m6 ; t19a 5961*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m9, m10, m5, m1 5962*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m9, m10, m5, m1 5963*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m9 5964*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m10 5965*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*0] ; t28 5966*c0909341SAndroid Build Coastguard Worker mova m10, [r6+32*1] ; t29a 5967*c0909341SAndroid Build Coastguard Worker psubd m6, m9, m3 ; t27a 5968*c0909341SAndroid Build Coastguard Worker paddd m9, m3 ; t28a 5969*c0909341SAndroid Build Coastguard Worker psubd m3, m10, m4 ; t26 5970*c0909341SAndroid Build Coastguard Worker paddd m10, m4 ; t29 5971*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m9, m10, m6, m3 5972*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m9, m10, m6, m3 5973*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m14}, m6, m3, m1, m5 5974*c0909341SAndroid Build Coastguard Worker paddd m6, m11 5975*c0909341SAndroid Build Coastguard Worker paddd m3, m11 5976*c0909341SAndroid Build Coastguard Worker psubd m4, m6, m1 ; t20 5977*c0909341SAndroid Build Coastguard Worker paddd m6, m1 ; t27 5978*c0909341SAndroid Build Coastguard Worker psubd m1, m3, m5 ; t21a 5979*c0909341SAndroid Build Coastguard Worker paddd m3, m5 ; t26a 5980*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m4, m1, m3, m6 5981*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 5982*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m1 5983*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32*2] ; t30 5984*c0909341SAndroid Build Coastguard Worker mova m1, [r6+32*3] ; t31a 5985*c0909341SAndroid Build Coastguard Worker psubd m5, m4, m8 ; t25a 5986*c0909341SAndroid Build Coastguard Worker paddd m4, m8 ; t30a 5987*c0909341SAndroid Build Coastguard Worker psubd m8, m1, m0 ; t24 5988*c0909341SAndroid Build Coastguard Worker paddd m1, m0 ; t31 5989*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m5, m4, m1 5990*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m5, m4, m1 5991*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m14}, m5, m8, m7, m2 5992*c0909341SAndroid Build Coastguard Worker paddd m5, m11 5993*c0909341SAndroid Build Coastguard Worker paddd m8, m11 5994*c0909341SAndroid Build Coastguard Worker psubd m0, m5, m7 ; t22 5995*c0909341SAndroid Build Coastguard Worker paddd m5, m7 ; t25 5996*c0909341SAndroid Build Coastguard Worker psubd m7, m8, m2 ; t23a 5997*c0909341SAndroid Build Coastguard Worker paddd m2, m8 ; t24a 5998*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m7, m2, m5 5999*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m0 6000*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 6001*c0909341SAndroid Build Coastguard Worker mov r4, r6 6002*c0909341SAndroid Build Coastguard Worker add r6, 32*8 6003*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m2 6004*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m5 6005*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m3 6006*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m6 6007*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m9 6008*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m10 6009*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m4 6010*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m1 6011*c0909341SAndroid Build Coastguard Worker mov r5, r6 6012*c0909341SAndroid Build Coastguard Worker add r6, 32*8 6013*c0909341SAndroid Build Coastguard Worker ret 6014*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6015*c0909341SAndroid Build Coastguard Worker.main_end: 6016*c0909341SAndroid Build Coastguard Worker psrld m11, 10 ; pd_2 6017*c0909341SAndroid Build Coastguard Worker IDCT32_END 0, 15, 8, 9, 10, 2 6018*c0909341SAndroid Build Coastguard Worker IDCT32_END 1, 14, 8, 9, 10, 2 6019*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 ; 16 17 6020*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 ; 0 1 6021*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m14, m15 ; 14 15 6022*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 30 31 6023*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m8 6024*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m14 6025*c0909341SAndroid Build Coastguard Worker IDCT32_END 2, 15, 8, 9, 10, 2 6026*c0909341SAndroid Build Coastguard Worker IDCT32_END 3, 14, 8, 9, 10, 2 6027*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m3 ; 18 19 6028*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 2 3 6029*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m14, m15 ; 12 13 6030*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 28 29 6031*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m8 6032*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m14 6033*c0909341SAndroid Build Coastguard Worker IDCT32_END 4, 15, 8, 9, 10, 2 6034*c0909341SAndroid Build Coastguard Worker IDCT32_END 5, 14, 8, 9, 10, 2 6035*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m4, m5 ; 20 21 6036*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ; 4 5 6037*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m14, m15 ; 10 11 6038*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 26 27 6039*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m8 6040*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m14 6041*c0909341SAndroid Build Coastguard Worker IDCT32_END 6, 15, 8, 9, 10, 2 6042*c0909341SAndroid Build Coastguard Worker IDCT32_END 7, 14, 8, 9, 10, 2 6043*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6, m7 ; 22 23 6044*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 ; 6 7 6045*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m14, m15 ; 8 9 6046*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 24 25 6047*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m8 6048*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m14 6049*c0909341SAndroid Build Coastguard Worker.transpose: 6050*c0909341SAndroid Build Coastguard Worker punpckhdq m15, m3, m1 6051*c0909341SAndroid Build Coastguard Worker punpckldq m3, m1 6052*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m4, m6 6053*c0909341SAndroid Build Coastguard Worker punpckldq m4, m6 6054*c0909341SAndroid Build Coastguard Worker punpckhdq m6, m0, m2 6055*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 6056*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m7, m5 6057*c0909341SAndroid Build Coastguard Worker punpckldq m7, m5 6058*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m2, m15 6059*c0909341SAndroid Build Coastguard Worker punpckhqdq m2, m15 6060*c0909341SAndroid Build Coastguard Worker punpckhqdq m15, m7, m3 6061*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m3 6062*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m6, m1 6063*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m1 6064*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m4 6065*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 6066*c0909341SAndroid Build Coastguard Worker vperm2i128 m4, m0, m7, 0x31 6067*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm7, 1 6068*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m3, m2, 0x31 6069*c0909341SAndroid Build Coastguard Worker vinserti128 m3, xm2, 1 6070*c0909341SAndroid Build Coastguard Worker vinserti128 m2, m6, xm5, 1 6071*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m5, 0x31 6072*c0909341SAndroid Build Coastguard Worker vperm2i128 m5, m1, m15, 0x31 6073*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm15, 1 6074*c0909341SAndroid Build Coastguard Worker ret 6075*c0909341SAndroid Build Coastguard Worker 6076*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_10bpc, 4, 7, 8, dst, stride, c, eob 6077*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 6078*c0909341SAndroid Build Coastguard Worker.pass1: 6079*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_5] 6080*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6081*c0909341SAndroid Build Coastguard Worker mov r6d, eobd 6082*c0909341SAndroid Build Coastguard Worker add eobb, 21 6083*c0909341SAndroid Build Coastguard Worker cmovc eobd, r6d ; 43, 107, 171 -> 64, 128, 192 6084*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 6085*c0909341SAndroid Build Coastguard Worker lea r5, [strideq*5] 6086*c0909341SAndroid Build Coastguard Worker lea r4, [strideq+r6*2] ; strideq*7 6087*c0909341SAndroid Build Coastguard Worker.loop: 6088*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 6089*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*1] 6090*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*2] 6091*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+128*3] 6092*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*4] 6093*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*5] 6094*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*6] 6095*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+128*7] 6096*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, m5}, m0, m1, m2, m3 6097*c0909341SAndroid Build Coastguard Worker REPX {psraw x, 3 }, m0, m1, m2, m3 6098*c0909341SAndroid Build Coastguard Worker call .main_zero 6099*c0909341SAndroid Build Coastguard Worker add cq, 32 6100*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8] 6101*c0909341SAndroid Build Coastguard Worker sub eobd, 64 6102*c0909341SAndroid Build Coastguard Worker jge .loop 6103*c0909341SAndroid Build Coastguard Worker RET 6104*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6105*c0909341SAndroid Build Coastguard Worker.main_zero: 6106*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7 6107*c0909341SAndroid Build Coastguard Worker.main: 6108*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 6109*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 6110*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 6111*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 6112*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m4 6113*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 6114*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m2, m1 6115*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m1 6116*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m2 6117*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m2 6118*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3, m4 6119*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m4 6120*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*0] 6121*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [dstq+strideq*4], 1 6122*c0909341SAndroid Build Coastguard Worker paddw m0, m4 6123*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*1] 6124*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [dstq+r5 ], 1 6125*c0909341SAndroid Build Coastguard Worker paddw m1, m4 6126*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+strideq*2] 6127*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [dstq+r6*2 ], 1 6128*c0909341SAndroid Build Coastguard Worker paddw m2, m4 6129*c0909341SAndroid Build Coastguard Worker mova xm4, [dstq+r6 ] 6130*c0909341SAndroid Build Coastguard Worker vinserti128 m4, [dstq+r4 ], 1 6131*c0909341SAndroid Build Coastguard Worker paddw m3, m4 6132*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m6}, m0, m1, m2, m3 6133*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m7}, m0, m1, m2, m3 6134*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*0], xm0 6135*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+strideq*4], m0, 1 6136*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*1], xm1 6137*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+r5 ], m1, 1 6138*c0909341SAndroid Build Coastguard Worker mova [dstq+strideq*2], xm2 6139*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+r6*2 ], m2, 1 6140*c0909341SAndroid Build Coastguard Worker mova [dstq+r6 ], xm3 6141*c0909341SAndroid Build Coastguard Worker vextracti128 [dstq+r4 ], m3, 1 6142*c0909341SAndroid Build Coastguard Worker ret 6143*c0909341SAndroid Build Coastguard Worker 6144*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_8x32_12bpc, 4, 7, 0, dst, stride, c, eob 6145*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6146*c0909341SAndroid Build Coastguard Worker jz .dconly 6147*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 7, 16, 32*24, dst, stride, c, eob 6148*c0909341SAndroid Build Coastguard Worker%undef cmp 6149*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 6150*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 6151*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 6152*c0909341SAndroid Build Coastguard Worker mov r4, cq 6153*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 6154*c0909341SAndroid Build Coastguard Worker call .pass1_main 6155*c0909341SAndroid Build Coastguard Worker cmp eobd, 43 6156*c0909341SAndroid Build Coastguard Worker jge .eob43 6157*c0909341SAndroid Build Coastguard Worker jmp .pass2_fast 6158*c0909341SAndroid Build Coastguard Worker.eob43: 6159*c0909341SAndroid Build Coastguard Worker call .pass1_main 6160*c0909341SAndroid Build Coastguard Worker cmp eobd, 107 6161*c0909341SAndroid Build Coastguard Worker jge .eob107 6162*c0909341SAndroid Build Coastguard Worker.pass2_fast: 6163*c0909341SAndroid Build Coastguard Worker mov cq, r4 6164*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 6165*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 6166*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*1+ 0] 6167*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*7+ 0] 6168*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*1+32] 6169*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*7+32] 6170*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 6171*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 6172*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast 6173*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*3+ 0] 6174*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*5+ 0] 6175*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*3+32] 6176*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*5+32] 6177*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 6178*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast 6179*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*2+ 0] 6180*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*6+ 0] 6181*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*2+32] 6182*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*6+32] 6183*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 6184*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_fast 6185*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*0+ 0] 6186*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*4+ 0] 6187*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*0+32] 6188*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*4+32] 6189*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3 6190*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6191*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6192*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 6193*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 6194*c0909341SAndroid Build Coastguard Worker jmp .pass2_end 6195*c0909341SAndroid Build Coastguard Worker.eob107: 6196*c0909341SAndroid Build Coastguard Worker call .pass1_main 6197*c0909341SAndroid Build Coastguard Worker cmp eobd, 171 6198*c0909341SAndroid Build Coastguard Worker jge .eob171 6199*c0909341SAndroid Build Coastguard Worker jmp .pass2 6200*c0909341SAndroid Build Coastguard Worker.eob171: 6201*c0909341SAndroid Build Coastguard Worker call .pass1_main 6202*c0909341SAndroid Build Coastguard Worker.pass2: 6203*c0909341SAndroid Build Coastguard Worker mov cq, r4 6204*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 6205*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 6206*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*1+ 0] 6207*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*7+ 0] 6208*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*1+32] 6209*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*7+32] 6210*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+128*1+64] 6211*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+128*7+64] 6212*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+128*1+96] 6213*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+128*7+96] 6214*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 6215*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 6216*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1 6217*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*3+ 0] 6218*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*5+ 0] 6219*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*3+32] 6220*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*5+32] 6221*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+128*3+64] 6222*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+128*5+64] 6223*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+128*3+96] 6224*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+128*5+96] 6225*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 6226*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2 6227*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*2+ 0] 6228*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*6+ 0] 6229*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*2+32] 6230*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*6+32] 6231*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+128*2+64] 6232*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+128*6+64] 6233*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+128*2+96] 6234*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+128*6+96] 6235*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 6236*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 6237*c0909341SAndroid Build Coastguard Worker pmaxsd m0, m12, [cq+128*0+ 0] 6238*c0909341SAndroid Build Coastguard Worker pmaxsd m1, m12, [cq+128*4+ 0] 6239*c0909341SAndroid Build Coastguard Worker pmaxsd m2, m12, [cq+128*0+32] 6240*c0909341SAndroid Build Coastguard Worker pmaxsd m3, m12, [cq+128*4+32] 6241*c0909341SAndroid Build Coastguard Worker pmaxsd m4, m12, [cq+128*0+64] 6242*c0909341SAndroid Build Coastguard Worker pmaxsd m5, m12, [cq+128*4+64] 6243*c0909341SAndroid Build Coastguard Worker pmaxsd m6, m12, [cq+128*0+96] 6244*c0909341SAndroid Build Coastguard Worker pmaxsd m7, m12, [cq+128*4+96] 6245*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m1, m2, m3, m4, m5, m6, m7 6246*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 6247*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 6248*c0909341SAndroid Build Coastguard Worker.pass2_end: 6249*c0909341SAndroid Build Coastguard Worker psrld m11, 8 ; pd_8 6250*c0909341SAndroid Build Coastguard Worker IDCT32_END 0, 15, 8, 9, 10, 4 6251*c0909341SAndroid Build Coastguard Worker IDCT32_END 1, 14, 8, 9, 10, 4 6252*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m0, m1 ; 16 17 (interleaved) 6253*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m1 ; 0 1 (interleaved) 6254*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m14, m15 ; 14 15 (interleaved) 6255*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m15 ; 30 31 (interleaved) 6256*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m8 6257*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m14 6258*c0909341SAndroid Build Coastguard Worker IDCT32_END 2, 15, 8, 9, 10, 4 6259*c0909341SAndroid Build Coastguard Worker IDCT32_END 3, 14, 8, 9, 10, 4 6260*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m2, m3 ; 18 19 (interleaved) 6261*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m3 ; 2 3 (interleaved) 6262*c0909341SAndroid Build Coastguard Worker punpcklqdq m3, m14, m15 ; 12 13 (interleaved) 6263*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m15 ; 28 29 (interleaved) 6264*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m8 6265*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m14 6266*c0909341SAndroid Build Coastguard Worker IDCT32_END 4, 15, 8, 9, 10, 4 6267*c0909341SAndroid Build Coastguard Worker IDCT32_END 5, 14, 8, 9, 10, 4 6268*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m4, m5 ; 20 21 (interleaved) 6269*c0909341SAndroid Build Coastguard Worker punpcklqdq m4, m5 ; 4 5 (interleaved) 6270*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m14, m15 ; 10 11 (interleaved) 6271*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m15 ; 26 27 (interleaved) 6272*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m8 6273*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m14 6274*c0909341SAndroid Build Coastguard Worker IDCT32_END 6, 15, 8, 9, 10, 4 6275*c0909341SAndroid Build Coastguard Worker IDCT32_END 7, 14, 8, 9, 10, 4 6276*c0909341SAndroid Build Coastguard Worker punpckhqdq m8, m6, m7 ; 22 23 (interleaved) 6277*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m7 ; 6 7 (interleaved) 6278*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m14, m15 ; 8 9 (interleaved) 6279*c0909341SAndroid Build Coastguard Worker punpckhqdq m14, m15 ; 24 25 (interleaved) 6280*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m8 6281*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m14 6282*c0909341SAndroid Build Coastguard Worker mova m15, m1 6283*c0909341SAndroid Build Coastguard Worker.end: 6284*c0909341SAndroid Build Coastguard Worker vpermq m0, m0, q3120 6285*c0909341SAndroid Build Coastguard Worker vpermq m1, m2, q3120 6286*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_12bpc).write_8x4_start 6287*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6288*c0909341SAndroid Build Coastguard Worker vpermq m0, m4, q3120 6289*c0909341SAndroid Build Coastguard Worker vpermq m1, m6, q3120 6290*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6291*c0909341SAndroid Build Coastguard Worker vpermq m0, m7, q3120 6292*c0909341SAndroid Build Coastguard Worker vpermq m1, m5, q3120 6293*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6294*c0909341SAndroid Build Coastguard Worker vpermq m0, m3, q3120 6295*c0909341SAndroid Build Coastguard Worker vpermq m1, m15, q3120 6296*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6297*c0909341SAndroid Build Coastguard Worker vpermq m0, [r5+32*3], q3120 6298*c0909341SAndroid Build Coastguard Worker vpermq m1, [r5+32*1], q3120 6299*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6300*c0909341SAndroid Build Coastguard Worker vpermq m0, [r5-32*1], q3120 6301*c0909341SAndroid Build Coastguard Worker vpermq m1, [r5-32*3], q3120 6302*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6303*c0909341SAndroid Build Coastguard Worker vpermq m0, [r5-32*4], q3120 6304*c0909341SAndroid Build Coastguard Worker vpermq m1, [r5-32*2], q3120 6305*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6306*c0909341SAndroid Build Coastguard Worker vpermq m0, [r5+32*0], q3120 6307*c0909341SAndroid Build Coastguard Worker vpermq m1, [r5+32*2], q3120 6308*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).write_8x4 6309*c0909341SAndroid Build Coastguard Worker RET 6310*c0909341SAndroid Build Coastguard Worker.dconly: 6311*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 6312*c0909341SAndroid Build Coastguard Worker vpbroadcastd m2, [dconly_12bpc] 6313*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 6314*c0909341SAndroid Build Coastguard Worker or r3d, 32 6315*c0909341SAndroid Build Coastguard Worker add r6d, 640 6316*c0909341SAndroid Build Coastguard Worker sar r6d, 10 6317*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x8_10bpc).dconly3 6318*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6319*c0909341SAndroid Build Coastguard Worker.pass1_main: 6320*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).pass1_main_part1 6321*c0909341SAndroid Build Coastguard Worker TRANSPOSE_8X8_DWORD 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15 6322*c0909341SAndroid Build Coastguard Worker mova [cq+128*0], m0 6323*c0909341SAndroid Build Coastguard Worker mova [cq+128*1], m1 6324*c0909341SAndroid Build Coastguard Worker mova [cq+128*2], m2 6325*c0909341SAndroid Build Coastguard Worker mova [cq+128*3], m3 6326*c0909341SAndroid Build Coastguard Worker mova [cq+128*4], m4 6327*c0909341SAndroid Build Coastguard Worker mova [cq+128*5], m5 6328*c0909341SAndroid Build Coastguard Worker mova [cq+128*6], m6 6329*c0909341SAndroid Build Coastguard Worker mova [cq+128*7], m7 6330*c0909341SAndroid Build Coastguard Worker add cq, 32 6331*c0909341SAndroid Build Coastguard Worker ret 6332*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6333*c0909341SAndroid Build Coastguard Worker.main_end: 6334*c0909341SAndroid Build Coastguard Worker psrld m11, 10 ; pd_2 6335*c0909341SAndroid Build Coastguard Worker IDCT32_END 0, 15, 8, 9, 10, 2, 0 6336*c0909341SAndroid Build Coastguard Worker mova [cq+32*16], m8 6337*c0909341SAndroid Build Coastguard Worker mova [cq+32*31], m9 6338*c0909341SAndroid Build Coastguard Worker IDCT32_END 1, 14, 8, 9, 10, 2, 0 6339*c0909341SAndroid Build Coastguard Worker mova [cq+32*17], m8 6340*c0909341SAndroid Build Coastguard Worker mova [cq+32*30], m9 6341*c0909341SAndroid Build Coastguard Worker mova [cq+32*14], m14 6342*c0909341SAndroid Build Coastguard Worker IDCT32_END 2, 14, 8, 9, 10, 2, 0 6343*c0909341SAndroid Build Coastguard Worker mova [cq+32*18], m8 6344*c0909341SAndroid Build Coastguard Worker mova [cq+32*29], m9 6345*c0909341SAndroid Build Coastguard Worker mova [cq+32*13], m14 6346*c0909341SAndroid Build Coastguard Worker IDCT32_END 3, 14, 8, 9, 10, 2, 0 6347*c0909341SAndroid Build Coastguard Worker mova [cq+32*19], m8 6348*c0909341SAndroid Build Coastguard Worker mova [cq+32*28], m9 6349*c0909341SAndroid Build Coastguard Worker mova [cq+32*12], m14 6350*c0909341SAndroid Build Coastguard Worker IDCT32_END 4, 14, 8, 9, 10, 2, 0 6351*c0909341SAndroid Build Coastguard Worker mova [cq+32*20], m8 6352*c0909341SAndroid Build Coastguard Worker mova [cq+32*27], m9 6353*c0909341SAndroid Build Coastguard Worker mova [cq+32* 0], m0 6354*c0909341SAndroid Build Coastguard Worker mova [cq+32* 1], m1 6355*c0909341SAndroid Build Coastguard Worker mova [cq+32* 2], m2 6356*c0909341SAndroid Build Coastguard Worker IDCT32_END 5, 10, 0, 1, 2, 2, 0 6357*c0909341SAndroid Build Coastguard Worker mova [cq+32*21], m0 6358*c0909341SAndroid Build Coastguard Worker mova [cq+32*26], m1 6359*c0909341SAndroid Build Coastguard Worker IDCT32_END 6, 9, 0, 1, 2, 2, 0 6360*c0909341SAndroid Build Coastguard Worker mova [cq+32*22], m0 6361*c0909341SAndroid Build Coastguard Worker mova [cq+32*25], m1 6362*c0909341SAndroid Build Coastguard Worker IDCT32_END 7, 8, 0, 1, 2, 2, 0 6363*c0909341SAndroid Build Coastguard Worker mova [cq+32*23], m0 6364*c0909341SAndroid Build Coastguard Worker mova [cq+32*24], m1 6365*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 0] 6366*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 1] 6367*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32* 2] 6368*c0909341SAndroid Build Coastguard Worker mova m11, m14 6369*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32*12] 6370*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*13] 6371*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*14] 6372*c0909341SAndroid Build Coastguard Worker ret 6373*c0909341SAndroid Build Coastguard Worker 6374*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_8x32_12bpc, 4, 7, 8, dst, stride, c, eob 6375*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 6376*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_8x32_10bpc).pass1 6377*c0909341SAndroid Build Coastguard Worker 6378*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_10bpc, 4, 7, 0, dst, stride, c, eob 6379*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6380*c0909341SAndroid Build Coastguard Worker jnz .full 6381*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 6382*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 6383*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 6384*c0909341SAndroid Build Coastguard Worker or r3d, 8 6385*c0909341SAndroid Build Coastguard Worker.dconly: 6386*c0909341SAndroid Build Coastguard Worker add r6d, 640 6387*c0909341SAndroid Build Coastguard Worker sar r6d, 10 6388*c0909341SAndroid Build Coastguard Worker.dconly2: 6389*c0909341SAndroid Build Coastguard Worker imul r6d, 181 6390*c0909341SAndroid Build Coastguard Worker add r6d, 2176 6391*c0909341SAndroid Build Coastguard Worker sar r6d, 12 6392*c0909341SAndroid Build Coastguard Worker movd xm0, r6d 6393*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm3 6394*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 6395*c0909341SAndroid Build Coastguard Worker.dconly_loop: 6396*c0909341SAndroid Build Coastguard Worker paddsw m1, m0, [dstq+32*0] 6397*c0909341SAndroid Build Coastguard Worker paddsw m2, m0, [dstq+32*1] 6398*c0909341SAndroid Build Coastguard Worker psubusw m1, m3 6399*c0909341SAndroid Build Coastguard Worker psubusw m2, m3 6400*c0909341SAndroid Build Coastguard Worker mova [dstq+32*0], m1 6401*c0909341SAndroid Build Coastguard Worker mova [dstq+32*1], m2 6402*c0909341SAndroid Build Coastguard Worker add dstq, strideq 6403*c0909341SAndroid Build Coastguard Worker dec r3d 6404*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 6405*c0909341SAndroid Build Coastguard Worker RET 6406*c0909341SAndroid Build Coastguard Worker.full: 6407*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 7, 16, 32*24, dst, stride, c, eob 6408*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 6409*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 6410*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 6411*c0909341SAndroid Build Coastguard Worker call .pass1 6412*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_end 6413*c0909341SAndroid Build Coastguard Worker lea r6, [deint_shuf+128] 6414*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_2048] 6415*c0909341SAndroid Build Coastguard Worker mov r4, dstq 6416*c0909341SAndroid Build Coastguard Worker call .pass2 6417*c0909341SAndroid Build Coastguard Worker mova m0, [r5+32*3] ; 16 17 6418*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32*2] ; 30 31 6419*c0909341SAndroid Build Coastguard Worker mova m2, [r5+32*1] ; 18 19 6420*c0909341SAndroid Build Coastguard Worker mova m3, [r5+32*0] ; 28 29 6421*c0909341SAndroid Build Coastguard Worker mova m4, [r5-32*1] ; 20 21 6422*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*2] ; 26 27 6423*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*3] ; 22 23 6424*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*4] ; 24 25 6425*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 6426*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+32] 6427*c0909341SAndroid Build Coastguard Worker call .pass2 6428*c0909341SAndroid Build Coastguard Worker RET 6429*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6430*c0909341SAndroid Build Coastguard Worker.pass2: 6431*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_8bpc).main 6432*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m11}, m0, m1, m2, m3 6433*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4_start 6434*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m11, m4 6435*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m11, m5 6436*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m11, m6 6437*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m11, m7 6438*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4_zero 6439*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6440*c0909341SAndroid Build Coastguard Worker.pass1: 6441*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 1] 6442*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 7] 6443*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32* 9] 6444*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*15] 6445*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*17] 6446*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*23] 6447*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*25] 6448*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*31] 6449*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 6450*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 6451*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1 6452*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 3] 6453*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 5] 6454*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*11] 6455*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*13] 6456*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*19] 6457*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*21] 6458*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*27] 6459*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*29] 6460*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2 6461*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 2] 6462*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 6] 6463*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32*10] 6464*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*14] 6465*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*18] 6466*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*22] 6467*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*26] 6468*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*30] 6469*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 6470*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 0] 6471*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 4] 6472*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32* 8] 6473*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32*12] 6474*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32*16] 6475*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32*20] 6476*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32*24] 6477*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32*28] 6478*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 6479*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 6480*c0909341SAndroid Build Coastguard Worker ret 6481*c0909341SAndroid Build Coastguard Worker 6482*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_10bpc, 4, 7, 8, dst, stride, c, eob 6483*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 6484*c0909341SAndroid Build Coastguard Worker.pass1: 6485*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_4096] 6486*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6487*c0909341SAndroid Build Coastguard Worker mov r6d, eobd 6488*c0909341SAndroid Build Coastguard Worker add eobb, 21 6489*c0909341SAndroid Build Coastguard Worker cmovc eobd, r6d 6490*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 6491*c0909341SAndroid Build Coastguard Worker lea r5, [strideq*5] 6492*c0909341SAndroid Build Coastguard Worker lea r4, [strideq+r6*2] ; strideq*7 6493*c0909341SAndroid Build Coastguard Worker.loop: 6494*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32*0] 6495*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+32*1] 6496*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32*2] 6497*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+32*3] 6498*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m6}, 0, 1, 2, 3 6499*c0909341SAndroid Build Coastguard Worker add cq, 32*8 6500*c0909341SAndroid Build Coastguard Worker mova m2, [cq-32*4] 6501*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq-32*3] 6502*c0909341SAndroid Build Coastguard Worker mova m3, [cq-32*2] 6503*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq-32*1] 6504*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m0, m1, m2, m3 6505*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+32*x], m6}, -4, -3, -2, -1 6506*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_identity_identity_8x32_10bpc).main 6507*c0909341SAndroid Build Coastguard Worker add dstq, 16 6508*c0909341SAndroid Build Coastguard Worker sub eobd, 64 6509*c0909341SAndroid Build Coastguard Worker jge .loop 6510*c0909341SAndroid Build Coastguard Worker RET 6511*c0909341SAndroid Build Coastguard Worker 6512*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x8_12bpc, 4, 7, 0, dst, stride, c, eob 6513*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6514*c0909341SAndroid Build Coastguard Worker jnz .full 6515*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 6516*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_12bpc] 6517*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 6518*c0909341SAndroid Build Coastguard Worker or r3d, 8 6519*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly 6520*c0909341SAndroid Build Coastguard Worker.full: 6521*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 7, 16, 32*24, dst, stride, c, eob 6522*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 6523*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_20b_min] 6524*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_20b_max] 6525*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x8_10bpc).pass1 6526*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_12bpc).main_end 6527*c0909341SAndroid Build Coastguard Worker mov r4, dstq 6528*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).pass2_main 6529*c0909341SAndroid Build Coastguard Worker mova m0, [cq+32* 0] ; 16 6530*c0909341SAndroid Build Coastguard Worker mova m1, [cq+32* 1] ; 17 6531*c0909341SAndroid Build Coastguard Worker mova m2, [cq+32* 2] ; 18 6532*c0909341SAndroid Build Coastguard Worker mova m3, [cq+32* 3] ; 19 6533*c0909341SAndroid Build Coastguard Worker mova m4, [cq+32* 4] ; 20 6534*c0909341SAndroid Build Coastguard Worker mova m5, [cq+32* 5] ; 21 6535*c0909341SAndroid Build Coastguard Worker mova m6, [cq+32* 6] ; 22 6536*c0909341SAndroid Build Coastguard Worker mova m7, [cq+32* 7] ; 23 6537*c0909341SAndroid Build Coastguard Worker mova m8, [cq+32* 8] ; 24 6538*c0909341SAndroid Build Coastguard Worker mova m9, [cq+32* 9] ; 25 6539*c0909341SAndroid Build Coastguard Worker mova m10, [cq+32*10] ; 26 6540*c0909341SAndroid Build Coastguard Worker mova m11, [cq+32*11] ; 27 6541*c0909341SAndroid Build Coastguard Worker mova m12, [cq+32*12] ; 28 6542*c0909341SAndroid Build Coastguard Worker mova m13, [cq+32*13] ; 29 6543*c0909341SAndroid Build Coastguard Worker mova m14, [cq+32*14] ; 30 6544*c0909341SAndroid Build Coastguard Worker mova m15, [cq+32*15] ; 31 6545*c0909341SAndroid Build Coastguard Worker lea dstq, [r4+32] 6546*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_12bpc).pass2_main 6547*c0909341SAndroid Build Coastguard Worker RET 6548*c0909341SAndroid Build Coastguard Worker 6549*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x8_12bpc, 4, 7, 8, dst, stride, c, eob 6550*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 6551*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_32x8_10bpc).pass1 6552*c0909341SAndroid Build Coastguard Worker 6553*c0909341SAndroid Build Coastguard Worker%macro IDCT32_PASS2_END 6 ; coefs[1-2], tmp[1-2], offset[1-2] 6554*c0909341SAndroid Build Coastguard Worker mova m%4, [%2] 6555*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%1, m%4 6556*c0909341SAndroid Build Coastguard Worker psubsw m%1, m%4 6557*c0909341SAndroid Build Coastguard Worker%if %1 == 0 6558*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6559*c0909341SAndroid Build Coastguard Worker%endif 6560*c0909341SAndroid Build Coastguard Worker pmulhrsw m%3, m15 6561*c0909341SAndroid Build Coastguard Worker pmulhrsw m%1, m15 6562*c0909341SAndroid Build Coastguard Worker paddw m%3, [dstq+%5] 6563*c0909341SAndroid Build Coastguard Worker paddw m%1, [r2+%6] 6564*c0909341SAndroid Build Coastguard Worker pmaxsw m%3, m6 6565*c0909341SAndroid Build Coastguard Worker pmaxsw m%1, m6 6566*c0909341SAndroid Build Coastguard Worker pminsw m%3, m7 6567*c0909341SAndroid Build Coastguard Worker pminsw m%1, m7 6568*c0909341SAndroid Build Coastguard Worker mova [dstq+%5], m%3 6569*c0909341SAndroid Build Coastguard Worker mova [r2+%6], m%1 6570*c0909341SAndroid Build Coastguard Worker%endmacro 6571*c0909341SAndroid Build Coastguard Worker 6572*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x32_10bpc, 4, 7, 0, dst, stride, c, eob 6573*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6574*c0909341SAndroid Build Coastguard Worker jz .dconly 6575*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*36, dst, stride, c, eob 6576*c0909341SAndroid Build Coastguard Worker%undef cmp 6577*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 6578*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 6579*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 6580*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 6581*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*16] 6582*c0909341SAndroid Build Coastguard Worker lea r4, [r6+32*8] 6583*c0909341SAndroid Build Coastguard Worker lea r5, [r6+32*16] 6584*c0909341SAndroid Build Coastguard Worker call .main 6585*c0909341SAndroid Build Coastguard Worker sub eobd, 44 6586*c0909341SAndroid Build Coastguard Worker jge .eob44 6587*c0909341SAndroid Build Coastguard Worker vperm2i128 m2, m0, m3, 0x31 ; 5 6588*c0909341SAndroid Build Coastguard Worker vinserti128 m0, xm3, 1 ; 1 6589*c0909341SAndroid Build Coastguard Worker vperm2i128 m3, m1, m4, 0x31 ; 7 6590*c0909341SAndroid Build Coastguard Worker vinserti128 m1, xm4, 1 ; 3 6591*c0909341SAndroid Build Coastguard Worker pxor m4, m4 6592*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 6593*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m4}, 0, 1, 2, 3 6594*c0909341SAndroid Build Coastguard Worker jmp .fast 6595*c0909341SAndroid Build Coastguard Worker.dconly: 6596*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 6597*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 6598*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 6599*c0909341SAndroid Build Coastguard Worker or r3d, 32 6600*c0909341SAndroid Build Coastguard Worker add r6d, 128 6601*c0909341SAndroid Build Coastguard Worker sar r6d, 8 6602*c0909341SAndroid Build Coastguard Worker imul r6d, 181 6603*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly2 6604*c0909341SAndroid Build Coastguard Worker.eob44: 6605*c0909341SAndroid Build Coastguard Worker mova [r4+16*0], xm0 6606*c0909341SAndroid Build Coastguard Worker mova [r4+16*1], xm3 6607*c0909341SAndroid Build Coastguard Worker mova [r4+16*2], xm1 6608*c0909341SAndroid Build Coastguard Worker mova [r4+16*3], xm4 6609*c0909341SAndroid Build Coastguard Worker vextracti128 [r4+16*4], m0, 1 6610*c0909341SAndroid Build Coastguard Worker vextracti128 [r4+16*5], m3, 1 6611*c0909341SAndroid Build Coastguard Worker vextracti128 [r4+16*6], m1, 1 6612*c0909341SAndroid Build Coastguard Worker vextracti128 [r4+16*7], m4, 1 6613*c0909341SAndroid Build Coastguard Worker call .main 6614*c0909341SAndroid Build Coastguard Worker sub eobd, 107 6615*c0909341SAndroid Build Coastguard Worker jge .eob151 6616*c0909341SAndroid Build Coastguard Worker vperm2i128 m7, m1, m4, 0x31 ; 15 6617*c0909341SAndroid Build Coastguard Worker vinserti128 m5, m1, xm4, 1 ; 11 6618*c0909341SAndroid Build Coastguard Worker vperm2i128 m6, m0, m3, 0x31 ; 13 6619*c0909341SAndroid Build Coastguard Worker vinserti128 m4, m0, xm3, 1 ; 9 6620*c0909341SAndroid Build Coastguard Worker mova m0, [r4+32*0] 6621*c0909341SAndroid Build Coastguard Worker mova m1, [r4+32*1] 6622*c0909341SAndroid Build Coastguard Worker mova m2, [r4+32*2] 6623*c0909341SAndroid Build Coastguard Worker mova m3, [r4+32*3] 6624*c0909341SAndroid Build Coastguard Worker.fast: 6625*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 6626*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 6627*c0909341SAndroid Build Coastguard Worker pxor m8, m8 6628*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, m15 6629*c0909341SAndroid Build Coastguard Worker jmp .idct16 6630*c0909341SAndroid Build Coastguard Worker.eob151: 6631*c0909341SAndroid Build Coastguard Worker mova [r4-16*8], xm0 6632*c0909341SAndroid Build Coastguard Worker mova [r4-16*7], xm3 6633*c0909341SAndroid Build Coastguard Worker mova [r4-16*6], xm1 6634*c0909341SAndroid Build Coastguard Worker mova [r4-16*5], xm4 6635*c0909341SAndroid Build Coastguard Worker vextracti128 [r4-16*4], m0, 1 6636*c0909341SAndroid Build Coastguard Worker vextracti128 [r4-16*3], m3, 1 6637*c0909341SAndroid Build Coastguard Worker vextracti128 [r4-16*2], m1, 1 6638*c0909341SAndroid Build Coastguard Worker vextracti128 [r4-16*1], m4, 1 6639*c0909341SAndroid Build Coastguard Worker call .main 6640*c0909341SAndroid Build Coastguard Worker sub eobd, 128 6641*c0909341SAndroid Build Coastguard Worker jge .eob279 6642*c0909341SAndroid Build Coastguard Worker vperm2i128 m10, m0, m3, 0x31 ; 21 6643*c0909341SAndroid Build Coastguard Worker vinserti128 m8, m0, xm3, 1 ; 17 6644*c0909341SAndroid Build Coastguard Worker vperm2i128 m11, m1, m4, 0x31 ; 23 6645*c0909341SAndroid Build Coastguard Worker vinserti128 m9, m1, xm4, 1 ; 19 6646*c0909341SAndroid Build Coastguard Worker pxor m12, m12 6647*c0909341SAndroid Build Coastguard Worker REPX {mova x, m12}, m13, m14, m15 6648*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m12}, 0, 1, 2, 3 6649*c0909341SAndroid Build Coastguard Worker jmp .full 6650*c0909341SAndroid Build Coastguard Worker.eob279: 6651*c0909341SAndroid Build Coastguard Worker mova [r5+16*0], xm0 6652*c0909341SAndroid Build Coastguard Worker mova [r5+16*1], xm3 6653*c0909341SAndroid Build Coastguard Worker mova [r5+16*2], xm1 6654*c0909341SAndroid Build Coastguard Worker mova [r5+16*3], xm4 6655*c0909341SAndroid Build Coastguard Worker vextracti128 [r5+16*4], m0, 1 6656*c0909341SAndroid Build Coastguard Worker vextracti128 [r5+16*5], m3, 1 6657*c0909341SAndroid Build Coastguard Worker vextracti128 [r5+16*6], m1, 1 6658*c0909341SAndroid Build Coastguard Worker vextracti128 [r5+16*7], m4, 1 6659*c0909341SAndroid Build Coastguard Worker call .main 6660*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m0, m3, 0x31 ; 29 6661*c0909341SAndroid Build Coastguard Worker vinserti128 m12, m0, xm3, 1 ; 25 6662*c0909341SAndroid Build Coastguard Worker vperm2i128 m15, m1, m4, 0x31 ; 31 6663*c0909341SAndroid Build Coastguard Worker vinserti128 m13, m1, xm4, 1 ; 27 6664*c0909341SAndroid Build Coastguard Worker mova m8, [r5+32*0] 6665*c0909341SAndroid Build Coastguard Worker mova m9, [r5+32*1] 6666*c0909341SAndroid Build Coastguard Worker mova m10, [r5+32*2] 6667*c0909341SAndroid Build Coastguard Worker mova m11, [r5+32*3] 6668*c0909341SAndroid Build Coastguard Worker.full: 6669*c0909341SAndroid Build Coastguard Worker mova m0, [r4+32*0] 6670*c0909341SAndroid Build Coastguard Worker mova m1, [r4+32*1] 6671*c0909341SAndroid Build Coastguard Worker mova m2, [r4+32*2] 6672*c0909341SAndroid Build Coastguard Worker mova m3, [r4+32*3] 6673*c0909341SAndroid Build Coastguard Worker mova m4, [r4-32*4] 6674*c0909341SAndroid Build Coastguard Worker mova m5, [r4-32*3] 6675*c0909341SAndroid Build Coastguard Worker mova m6, [r4-32*2] 6676*c0909341SAndroid Build Coastguard Worker mova m7, [r4-32*1] 6677*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5 + 128] 6678*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 6679*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+32*8] 6680*c0909341SAndroid Build Coastguard Worker mova m8, [r3+32*0] 6681*c0909341SAndroid Build Coastguard Worker mova m9, [r3+32*1] 6682*c0909341SAndroid Build Coastguard Worker mova m10, [r3+32*2] 6683*c0909341SAndroid Build Coastguard Worker mova m11, [r3+32*3] 6684*c0909341SAndroid Build Coastguard Worker mova m12, [r3-32*4] 6685*c0909341SAndroid Build Coastguard Worker mova m13, [r3-32*3] 6686*c0909341SAndroid Build Coastguard Worker mova m14, [r3-32*2] 6687*c0909341SAndroid Build Coastguard Worker mova m15, [r3-32*1] 6688*c0909341SAndroid Build Coastguard Worker.idct16: 6689*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+32*16] 6690*c0909341SAndroid Build Coastguard Worker mova m0, [r3+32*0] 6691*c0909341SAndroid Build Coastguard Worker mova m1, [r3+32*1] 6692*c0909341SAndroid Build Coastguard Worker mova m2, [r3+32*2] 6693*c0909341SAndroid Build Coastguard Worker mova m3, [r3+32*3] 6694*c0909341SAndroid Build Coastguard Worker mova m4, [r3-32*4] 6695*c0909341SAndroid Build Coastguard Worker mova m5, [r3-32*3] 6696*c0909341SAndroid Build Coastguard Worker mova m6, [r3-32*2] 6697*c0909341SAndroid Build Coastguard Worker mova m7, [r3-32*1] 6698*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 6699*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 6700*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 6701*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 6702*c0909341SAndroid Build Coastguard Worker add r2, dstq 6703*c0909341SAndroid Build Coastguard Worker call .pass2_end 6704*c0909341SAndroid Build Coastguard Worker RET 6705*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6706*c0909341SAndroid Build Coastguard Worker.main: 6707*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 1] 6708*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 3] 6709*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128* 5] 6710*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128* 7] 6711*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128* 9] 6712*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*11] 6713*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*13] 6714*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*15] 6715*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_rect2 6716*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 0] 6717*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 2] 6718*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128* 4] 6719*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128* 6] 6720*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128* 8] 6721*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*10] 6722*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*12] 6723*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*14] 6724*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_rect2 6725*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 6726*c0909341SAndroid Build Coastguard Worker psrld m15, m11, 11 ; pd_1 6727*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*4] 6728*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*3] 6729*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7 6730*c0909341SAndroid Build Coastguard Worker psubd m10, m0, m8 ; out15 6731*c0909341SAndroid Build Coastguard Worker paddd m0, m8 ; out0 6732*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*2] 6733*c0909341SAndroid Build Coastguard Worker paddd m15, m1, m9 ; out1 6734*c0909341SAndroid Build Coastguard Worker psubd m1, m9 ; out14 6735*c0909341SAndroid Build Coastguard Worker mova m9, [r6-32*1] 6736*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m0, m15, m10, m1 6737*c0909341SAndroid Build Coastguard Worker packssdw m0, m15 6738*c0909341SAndroid Build Coastguard Worker packssdw m1, m10 6739*c0909341SAndroid Build Coastguard Worker psubd m10, m2, m8 ; out13 6740*c0909341SAndroid Build Coastguard Worker paddd m2, m8 ; out2 6741*c0909341SAndroid Build Coastguard Worker mova m8, [r6+32*0] 6742*c0909341SAndroid Build Coastguard Worker paddd m15, m3, m9 ; out3 6743*c0909341SAndroid Build Coastguard Worker psubd m3, m9 ; out12 6744*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*1] 6745*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m2, m15, m10, m3 6746*c0909341SAndroid Build Coastguard Worker packssdw m2, m15 6747*c0909341SAndroid Build Coastguard Worker packssdw m3, m10 6748*c0909341SAndroid Build Coastguard Worker psubd m10, m4, m8 ; out11 6749*c0909341SAndroid Build Coastguard Worker paddd m4, m8 ; out4 6750*c0909341SAndroid Build Coastguard Worker mova m8, [r6+32*2] 6751*c0909341SAndroid Build Coastguard Worker paddd m15, m5, m9 ; out5 6752*c0909341SAndroid Build Coastguard Worker psubd m5, m9 ; out10 6753*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*3] 6754*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m4, m10, m15, m5 6755*c0909341SAndroid Build Coastguard Worker packssdw m4, m15 6756*c0909341SAndroid Build Coastguard Worker packssdw m5, m10 6757*c0909341SAndroid Build Coastguard Worker psubd m10, m6, m8 ; out9 6758*c0909341SAndroid Build Coastguard Worker paddd m6, m8 ; out6 6759*c0909341SAndroid Build Coastguard Worker paddd m15, m7, m9 ; out7 6760*c0909341SAndroid Build Coastguard Worker psubd m7, m9 ; out8 6761*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 1}, m6, m10, m15, m7 6762*c0909341SAndroid Build Coastguard Worker packssdw m6, m15 6763*c0909341SAndroid Build Coastguard Worker packssdw m7, m10 6764*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m2 6765*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m2 6766*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m3, m1 6767*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m1 6768*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m4, m6 6769*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m6 6770*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7, m5 6771*c0909341SAndroid Build Coastguard Worker punpckhwd m7, m5 6772*c0909341SAndroid Build Coastguard Worker pxor m5, m5 6773*c0909341SAndroid Build Coastguard Worker mov r7d, 128*13 6774*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 6775*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m5 6776*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m5 6777*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m5 6778*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m5 6779*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 6780*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 6781*c0909341SAndroid Build Coastguard Worker add cq, 32 6782*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m3, m2 6783*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m2 6784*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m4, m1 6785*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m1 6786*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m0, m8 6787*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m8 6788*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6, m7 6789*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 6790*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m1, m4 6791*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m4 6792*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m8, m3 6793*c0909341SAndroid Build Coastguard Worker punpcklqdq m8, m3 6794*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m6, m5 6795*c0909341SAndroid Build Coastguard Worker punpcklqdq m6, m5 6796*c0909341SAndroid Build Coastguard Worker punpcklqdq m5, m0, m2 6797*c0909341SAndroid Build Coastguard Worker punpckhqdq m0, m2 6798*c0909341SAndroid Build Coastguard Worker mova [r6+16*0], xm5 6799*c0909341SAndroid Build Coastguard Worker mova [r6+16*1], xm6 6800*c0909341SAndroid Build Coastguard Worker mova [r6+16*2], xm7 6801*c0909341SAndroid Build Coastguard Worker mova [r6+16*3], xm8 6802*c0909341SAndroid Build Coastguard Worker vextracti128 [r6+16*4], m5, 1 6803*c0909341SAndroid Build Coastguard Worker vextracti128 [r6+16*5], m6, 1 6804*c0909341SAndroid Build Coastguard Worker vextracti128 [r6+16*6], m7, 1 6805*c0909341SAndroid Build Coastguard Worker vextracti128 [r6+16*7], m8, 1 6806*c0909341SAndroid Build Coastguard Worker sub r6, 32*4 6807*c0909341SAndroid Build Coastguard Worker ret 6808*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6809*c0909341SAndroid Build Coastguard Worker.pass2_end: 6810*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m6 6811*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m7 6812*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*3], m15 6813*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pw_2048] 6814*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 6815*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 0, r5+32*3, 1, 6, strideq*0, r3*4 6816*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 4, r5-32*1, 0, 1, strideq*4, strideq*8 6817*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 8, r4+32*3, 0, 4, strideq*8, strideq*4 6818*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 12, r4-32*1, 0, 4, r3*4, strideq*0 6819*c0909341SAndroid Build Coastguard Worker add dstq, strideq 6820*c0909341SAndroid Build Coastguard Worker sub r2, strideq 6821*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*1] 6822*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 1, r5+32*2, 0, 4, strideq*0, r3*4 6823*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 5, r5-32*2, 0, 4, strideq*4, strideq*8 6824*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 9, r4+32*2, 0, 4, strideq*8, strideq*4 6825*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 13, r4-32*2, 0, 4, r3*4, strideq*0 6826*c0909341SAndroid Build Coastguard Worker add dstq, strideq 6827*c0909341SAndroid Build Coastguard Worker sub r2, strideq 6828*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*0] 6829*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 2, r5+32*1, 0, 4, strideq*0, r3*4 6830*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 1, r5-32*3, 0, 4, strideq*4, strideq*8 6831*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 10, r4+32*1, 0, 4, strideq*8, strideq*4 6832*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 14, r4-32*3, 0, 4, r3*4, strideq*0 6833*c0909341SAndroid Build Coastguard Worker add dstq, strideq 6834*c0909341SAndroid Build Coastguard Worker sub r2, strideq 6835*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*2] 6836*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+gprsize+32*3] 6837*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 3, r5+32*0, 0, 4, strideq*0, r3*4 6838*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 1, r5-32*4, 0, 4, strideq*4, strideq*8 6839*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 11, r4+32*0, 0, 4, strideq*8, strideq*4 6840*c0909341SAndroid Build Coastguard Worker IDCT32_PASS2_END 2, r4-32*4, 0, 4, r3*4, strideq*0 6841*c0909341SAndroid Build Coastguard Worker ret 6842*c0909341SAndroid Build Coastguard Worker 6843*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_10bpc, 4, 7, 12, dst, stride, c, eob 6844*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 6845*c0909341SAndroid Build Coastguard Worker.pass1: 6846*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_2896x8] 6847*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_1697x16] 6848*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_8192] 6849*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*5] 6850*c0909341SAndroid Build Coastguard Worker pxor m6, m6 6851*c0909341SAndroid Build Coastguard Worker paddw m10, m11, m11 ; pw_16384 6852*c0909341SAndroid Build Coastguard Worker mov r5, dstq 6853*c0909341SAndroid Build Coastguard Worker call .main 6854*c0909341SAndroid Build Coastguard Worker sub eobd, 36 6855*c0909341SAndroid Build Coastguard Worker jl .ret 6856*c0909341SAndroid Build Coastguard Worker add cq, 128*8 6857*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 6858*c0909341SAndroid Build Coastguard Worker call .main 6859*c0909341SAndroid Build Coastguard Worker sub cq, 128*8-32 6860*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+strideq*8] 6861*c0909341SAndroid Build Coastguard Worker mov r5, dstq 6862*c0909341SAndroid Build Coastguard Worker call .main 6863*c0909341SAndroid Build Coastguard Worker sub eobd, 107 ; eob < 143 6864*c0909341SAndroid Build Coastguard Worker jl .ret 6865*c0909341SAndroid Build Coastguard Worker add cq, 128*8 6866*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 6867*c0909341SAndroid Build Coastguard Worker call .main 6868*c0909341SAndroid Build Coastguard Worker sub cq, 128*8-32 6869*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+strideq*8] 6870*c0909341SAndroid Build Coastguard Worker mov r5, dstq 6871*c0909341SAndroid Build Coastguard Worker call .main 6872*c0909341SAndroid Build Coastguard Worker sub eobd, 128 ; eob < 271 6873*c0909341SAndroid Build Coastguard Worker jl .ret 6874*c0909341SAndroid Build Coastguard Worker add cq, 128*8 6875*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 6876*c0909341SAndroid Build Coastguard Worker call .main 6877*c0909341SAndroid Build Coastguard Worker sub cq, 128*8-32 6878*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+strideq*8] 6879*c0909341SAndroid Build Coastguard Worker mov r5, dstq 6880*c0909341SAndroid Build Coastguard Worker call .main 6881*c0909341SAndroid Build Coastguard Worker sub eobd, 128 ; eob < 399 6882*c0909341SAndroid Build Coastguard Worker jl .ret 6883*c0909341SAndroid Build Coastguard Worker add cq, 128*8 6884*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16] 6885*c0909341SAndroid Build Coastguard Worker call .main 6886*c0909341SAndroid Build Coastguard Worker.ret: 6887*c0909341SAndroid Build Coastguard Worker RET 6888*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6889*c0909341SAndroid Build Coastguard Worker.main: 6890*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 6891*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*1] 6892*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*2] 6893*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+128*3] 6894*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*4] 6895*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*5] 6896*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*6] 6897*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+128*7] 6898*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8 }, m0, m1, m2, m3 6899*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 4, 9, 10}, 0, 1, 2, 3 6900*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m11}, m0, m1, m2, m3 6901*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+128*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7 6902*c0909341SAndroid Build Coastguard Worker.main2: 6903*c0909341SAndroid Build Coastguard Worker punpckhwd m4, m0, m1 6904*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 6905*c0909341SAndroid Build Coastguard Worker punpckhwd m1, m2, m3 6906*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 6907*c0909341SAndroid Build Coastguard Worker punpckhwd m3, m0, m4 6908*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m4 6909*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m2, m1 6910*c0909341SAndroid Build Coastguard Worker punpckhwd m2, m1 6911*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m0, m4 6912*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 6913*c0909341SAndroid Build Coastguard Worker call m(iidentity_8x8_internal_10bpc).write_2x8x2 6914*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m3, m2 6915*c0909341SAndroid Build Coastguard Worker punpckhqdq m1, m3, m2 6916*c0909341SAndroid Build Coastguard Worker jmp m(iidentity_8x8_internal_10bpc).write_2x8x2 6917*c0909341SAndroid Build Coastguard Worker 6918*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_16x32_12bpc, 4, 7, 12, dst, stride, c, eob 6919*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 6920*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_16x32_10bpc).pass1 6921*c0909341SAndroid Build Coastguard Worker 6922*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x16_10bpc, 4, 7, 0, dst, stride, c, eob 6923*c0909341SAndroid Build Coastguard Worker test eobd, eobd 6924*c0909341SAndroid Build Coastguard Worker jz .dconly 6925*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*40, dst, stride, c, eob 6926*c0909341SAndroid Build Coastguard Worker%undef cmp 6927*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 6928*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 6929*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 6930*c0909341SAndroid Build Coastguard Worker call .main 6931*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 6932*c0909341SAndroid Build Coastguard Worker jge .full 6933*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 6934*c0909341SAndroid Build Coastguard Worker pxor m8, m8 6935*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, [rsp] 6936*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 6937*c0909341SAndroid Build Coastguard Worker mov r7, dstq 6938*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 6939*c0909341SAndroid Build Coastguard Worker call .write_16x16 6940*c0909341SAndroid Build Coastguard Worker mova m0, [r5+32*3] 6941*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32*2] 6942*c0909341SAndroid Build Coastguard Worker mova m2, [r5+32*1] 6943*c0909341SAndroid Build Coastguard Worker mova m3, [r5+32*0] 6944*c0909341SAndroid Build Coastguard Worker mova m4, [r5-32*1] 6945*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*2] 6946*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*3] 6947*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*4] 6948*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 6949*c0909341SAndroid Build Coastguard Worker pxor m8, m8 6950*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14, [rsp] 6951*c0909341SAndroid Build Coastguard Worker jmp .end 6952*c0909341SAndroid Build Coastguard Worker.dconly: 6953*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 6954*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 6955*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 6956*c0909341SAndroid Build Coastguard Worker or r3d, 16 6957*c0909341SAndroid Build Coastguard Worker add r6d, 128 6958*c0909341SAndroid Build Coastguard Worker sar r6d, 8 6959*c0909341SAndroid Build Coastguard Worker imul r6d, 181 6960*c0909341SAndroid Build Coastguard Worker add r6d, 384 6961*c0909341SAndroid Build Coastguard Worker sar r6d, 9 6962*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly2 6963*c0909341SAndroid Build Coastguard Worker.full: 6964*c0909341SAndroid Build Coastguard Worker add cq, 32 6965*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m0 6966*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m1 6967*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m2 6968*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m3 6969*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m4 6970*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m5 6971*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m6 6972*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m7 6973*c0909341SAndroid Build Coastguard Worker call .main 6974*c0909341SAndroid Build Coastguard Worker sub r4, 32*16 ; topleft 16x8 6975*c0909341SAndroid Build Coastguard Worker call .transpose_16x16 6976*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 6977*c0909341SAndroid Build Coastguard Worker mov r7, dstq 6978*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 6979*c0909341SAndroid Build Coastguard Worker call .write_16x16 6980*c0909341SAndroid Build Coastguard Worker mova m0, [r5+32*3] 6981*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32*2] 6982*c0909341SAndroid Build Coastguard Worker mova m2, [r5+32*1] 6983*c0909341SAndroid Build Coastguard Worker mova m3, [r5+32*0] 6984*c0909341SAndroid Build Coastguard Worker mova m4, [r5-32*1] 6985*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*2] 6986*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*3] 6987*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*4] 6988*c0909341SAndroid Build Coastguard Worker add r4, 32*8 ; bottomleft 16x8 6989*c0909341SAndroid Build Coastguard Worker call .transpose_16x16 6990*c0909341SAndroid Build Coastguard Worker.end: 6991*c0909341SAndroid Build Coastguard Worker lea dstq, [r7+32] 6992*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 6993*c0909341SAndroid Build Coastguard Worker call .write_16x16 6994*c0909341SAndroid Build Coastguard Worker RET 6995*c0909341SAndroid Build Coastguard WorkerALIGN function_align 6996*c0909341SAndroid Build Coastguard Worker.transpose_16x16: 6997*c0909341SAndroid Build Coastguard Worker punpckhdq m8, m3, m1 6998*c0909341SAndroid Build Coastguard Worker punpckldq m3, m1 6999*c0909341SAndroid Build Coastguard Worker punpckhdq m1, m0, m2 7000*c0909341SAndroid Build Coastguard Worker punpckldq m0, m2 7001*c0909341SAndroid Build Coastguard Worker punpckhdq m2, m7, m5 7002*c0909341SAndroid Build Coastguard Worker punpckldq m7, m5 7003*c0909341SAndroid Build Coastguard Worker punpckhdq m5, m4, m6 7004*c0909341SAndroid Build Coastguard Worker punpckldq m4, m6 7005*c0909341SAndroid Build Coastguard Worker punpckhqdq m6, m0, m4 7006*c0909341SAndroid Build Coastguard Worker punpcklqdq m0, m4 7007*c0909341SAndroid Build Coastguard Worker punpckhqdq m4, m1, m5 7008*c0909341SAndroid Build Coastguard Worker punpcklqdq m1, m5 7009*c0909341SAndroid Build Coastguard Worker punpckhqdq m5, m7, m3 7010*c0909341SAndroid Build Coastguard Worker punpcklqdq m7, m3 7011*c0909341SAndroid Build Coastguard Worker punpckhqdq m3, m2, m8 7012*c0909341SAndroid Build Coastguard Worker punpcklqdq m2, m8 7013*c0909341SAndroid Build Coastguard Worker vinserti128 m8, m0, xm7, 1 7014*c0909341SAndroid Build Coastguard Worker vperm2i128 m12, m0, m7, 0x31 7015*c0909341SAndroid Build Coastguard Worker vinserti128 m9, m6, xm5, 1 7016*c0909341SAndroid Build Coastguard Worker vperm2i128 m13, m6, m5, 0x31 7017*c0909341SAndroid Build Coastguard Worker vinserti128 m10, m1, xm2, 1 7018*c0909341SAndroid Build Coastguard Worker vperm2i128 m14, m1, m2, 0x31 7019*c0909341SAndroid Build Coastguard Worker vinserti128 m11, m4, xm3, 1 7020*c0909341SAndroid Build Coastguard Worker vperm2i128 m15, m4, m3, 0x31 7021*c0909341SAndroid Build Coastguard Worker mova m0, [r4+32*3] 7022*c0909341SAndroid Build Coastguard Worker mova m1, [r4+32*2] 7023*c0909341SAndroid Build Coastguard Worker mova m2, [r4+32*1] 7024*c0909341SAndroid Build Coastguard Worker mova m3, [r4+32*0] 7025*c0909341SAndroid Build Coastguard Worker mova m4, [r4-32*1] 7026*c0909341SAndroid Build Coastguard Worker mova m5, [r4-32*2] 7027*c0909341SAndroid Build Coastguard Worker mova m6, [r4-32*3] 7028*c0909341SAndroid Build Coastguard Worker mova m7, [r4-32*4] 7029*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize], m15 7030*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 7031*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7032*c0909341SAndroid Build Coastguard Worker.main: 7033*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 7034*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 7035*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+64* 1] 7036*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+64* 7] 7037*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+64* 9] 7038*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+64*15] 7039*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+64*17] 7040*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+64*23] 7041*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+64*25] 7042*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+64*31] 7043*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_rect2 7044*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+64* 3] 7045*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+64* 5] 7046*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+64*11] 7047*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+64*13] 7048*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+64*19] 7049*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+64*21] 7050*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+64*27] 7051*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+64*29] 7052*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_rect2 7053*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+64* 2] 7054*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+64* 6] 7055*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+64*10] 7056*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+64*14] 7057*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+64*18] 7058*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+64*22] 7059*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+64*26] 7060*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+64*30] 7061*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_rect2 7062*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+64* 0] 7063*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+64* 4] 7064*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+64* 8] 7065*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+64*12] 7066*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+64*16] 7067*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+64*20] 7068*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+64*24] 7069*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+64*28] 7070*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_rect2 7071*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 7072*c0909341SAndroid Build Coastguard Worker pxor m8, m8 7073*c0909341SAndroid Build Coastguard Worker mov r7d, 64*30 7074*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 7075*c0909341SAndroid Build Coastguard Worker mova [cq+r7-64*2], m8 7076*c0909341SAndroid Build Coastguard Worker mova [cq+r7-64*1], m8 7077*c0909341SAndroid Build Coastguard Worker mova [cq+r7+64*0], m8 7078*c0909341SAndroid Build Coastguard Worker mova [cq+r7+64*1], m8 7079*c0909341SAndroid Build Coastguard Worker sub r7d, 64*4 7080*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 7081*c0909341SAndroid Build Coastguard Worker.main_end: 7082*c0909341SAndroid Build Coastguard Worker psrld m11, 11 ; pd_1 7083*c0909341SAndroid Build Coastguard Worker IDCT32_END 0, 15, 8, 9, 10, 1 7084*c0909341SAndroid Build Coastguard Worker IDCT32_END 1, 14, 8, 9, 10, 1 7085*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m0, m1 ; 16 17 7086*c0909341SAndroid Build Coastguard Worker punpcklwd m0, m1 ; 0 1 7087*c0909341SAndroid Build Coastguard Worker punpcklwd m1, m14, m15 ; 14 15 7088*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 30 31 7089*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m8 7090*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m14 7091*c0909341SAndroid Build Coastguard Worker IDCT32_END 2, 15, 8, 9, 10, 1 7092*c0909341SAndroid Build Coastguard Worker IDCT32_END 3, 14, 8, 9, 10, 1 7093*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m2, m3 ; 18 19 7094*c0909341SAndroid Build Coastguard Worker punpcklwd m2, m3 ; 2 3 7095*c0909341SAndroid Build Coastguard Worker punpcklwd m3, m14, m15 ; 12 13 7096*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 28 29 7097*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m8 7098*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m14 7099*c0909341SAndroid Build Coastguard Worker IDCT32_END 4, 15, 8, 9, 10, 1 7100*c0909341SAndroid Build Coastguard Worker IDCT32_END 5, 14, 8, 9, 10, 1 7101*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m4, m5 ; 20 21 7102*c0909341SAndroid Build Coastguard Worker punpcklwd m4, m5 ; 4 5 7103*c0909341SAndroid Build Coastguard Worker punpcklwd m5, m14, m15 ; 10 11 7104*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 26 27 7105*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m8 7106*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m14 7107*c0909341SAndroid Build Coastguard Worker IDCT32_END 6, 15, 8, 9, 10, 1 7108*c0909341SAndroid Build Coastguard Worker IDCT32_END 7, 14, 8, 9, 10, 1 7109*c0909341SAndroid Build Coastguard Worker punpckhwd m8, m6, m7 ; 22 23 7110*c0909341SAndroid Build Coastguard Worker punpcklwd m6, m7 ; 6 7 7111*c0909341SAndroid Build Coastguard Worker punpcklwd m7, m14, m15 ; 8 9 7112*c0909341SAndroid Build Coastguard Worker punpckhwd m14, m15 ; 24 25 7113*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m8 7114*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m14 7115*c0909341SAndroid Build Coastguard Worker ret 7116*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7117*c0909341SAndroid Build Coastguard Worker.write_16x16: 7118*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+gprsize+32*1] 7119*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*0], m8 7120*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*1], m9 7121*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize+32*2], m12 7122*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_2048] 7123*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pixel_10bpc_max] 7124*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 7125*c0909341SAndroid Build Coastguard Worker pxor m8, m8 7126*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12 7127*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12 7128*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12 7129*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12 7130*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 7131*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, m4 7132*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m5 7133*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m6 7134*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m7 7135*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 7136*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+gprsize+32*0] 7137*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, [rsp+gprsize+32*1] 7138*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m10 7139*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m11 7140*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).write_16x4 7141*c0909341SAndroid Build Coastguard Worker pmulhrsw m0, m12, [rsp+gprsize+32*2] 7142*c0909341SAndroid Build Coastguard Worker pmulhrsw m1, m12, m13 7143*c0909341SAndroid Build Coastguard Worker pmulhrsw m2, m12, m14 7144*c0909341SAndroid Build Coastguard Worker pmulhrsw m3, m12, m15 7145*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x8_internal_10bpc).write_16x4 7146*c0909341SAndroid Build Coastguard Worker 7147*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_10bpc, 4, 7, 11, dst, stride, c, eob 7148*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 7149*c0909341SAndroid Build Coastguard Worker.pass1: 7150*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [pw_2896x8] 7151*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [pw_1697x16] 7152*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pw_4096] 7153*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*5] 7154*c0909341SAndroid Build Coastguard Worker pxor m6, m6 7155*c0909341SAndroid Build Coastguard Worker mov r5, dstq 7156*c0909341SAndroid Build Coastguard Worker call .main 7157*c0909341SAndroid Build Coastguard Worker sub eobd, 36 7158*c0909341SAndroid Build Coastguard Worker jl .ret 7159*c0909341SAndroid Build Coastguard Worker add cq, 32 7160*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 7161*c0909341SAndroid Build Coastguard Worker call .main 7162*c0909341SAndroid Build Coastguard Worker add cq, 64*8-32 7163*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16*1] 7164*c0909341SAndroid Build Coastguard Worker call .main 7165*c0909341SAndroid Build Coastguard Worker sub eobd, 107 ; eob < 143 7166*c0909341SAndroid Build Coastguard Worker jl .ret 7167*c0909341SAndroid Build Coastguard Worker add cq, 32 7168*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 7169*c0909341SAndroid Build Coastguard Worker call .main 7170*c0909341SAndroid Build Coastguard Worker add cq, 64*8-32 7171*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16*2] 7172*c0909341SAndroid Build Coastguard Worker call .main 7173*c0909341SAndroid Build Coastguard Worker sub eobd, 128 ; eob < 271 7174*c0909341SAndroid Build Coastguard Worker jl .ret 7175*c0909341SAndroid Build Coastguard Worker add cq, 32 7176*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 7177*c0909341SAndroid Build Coastguard Worker call .main 7178*c0909341SAndroid Build Coastguard Worker add cq, 64*8-32 7179*c0909341SAndroid Build Coastguard Worker lea dstq, [r5+16*3] 7180*c0909341SAndroid Build Coastguard Worker call .main 7181*c0909341SAndroid Build Coastguard Worker sub eobd, 128 ; eob < 399 7182*c0909341SAndroid Build Coastguard Worker jl .ret 7183*c0909341SAndroid Build Coastguard Worker add cq, 32 7184*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*4] 7185*c0909341SAndroid Build Coastguard Worker call .main 7186*c0909341SAndroid Build Coastguard Worker.ret: 7187*c0909341SAndroid Build Coastguard Worker RET 7188*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7189*c0909341SAndroid Build Coastguard Worker.main: 7190*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64*0] 7191*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+64*1] 7192*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*2] 7193*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+64*3] 7194*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*4] 7195*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+64*5] 7196*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*6] 7197*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+64*7] 7198*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m8 }, m0, m1, m2, m3 7199*c0909341SAndroid Build Coastguard Worker REPX {paddsw x, x }, m0, m1, m2, m3 7200*c0909341SAndroid Build Coastguard Worker REPX {IDTX16 x, 4, 9, _ }, 0, 1, 2, 3 7201*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m10}, m0, m1, m2, m3 7202*c0909341SAndroid Build Coastguard Worker REPX {mova [cq+64*x], m6}, 0, 1, 2, 3, 4, 5, 6, 7 7203*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_16x32_10bpc).main2 7204*c0909341SAndroid Build Coastguard Worker 7205*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x16_12bpc, 4, 7, 11, dst, stride, c, eob 7206*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 7207*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_32x16_10bpc).pass1 7208*c0909341SAndroid Build Coastguard Worker 7209*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x32_10bpc, 4, 7, 0, dst, stride, c, eob 7210*c0909341SAndroid Build Coastguard Worker test eobd, eobd 7211*c0909341SAndroid Build Coastguard Worker jz .dconly 7212*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*83, dst, stride, c, eob 7213*c0909341SAndroid Build Coastguard Worker%undef cmp 7214*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 7215*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 7216*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*7] 7217*c0909341SAndroid Build Coastguard Worker call .main 7218*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 7219*c0909341SAndroid Build Coastguard Worker jl .fast 7220*c0909341SAndroid Build Coastguard Worker call .main 7221*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 7222*c0909341SAndroid Build Coastguard Worker jl .fast 7223*c0909341SAndroid Build Coastguard Worker call .main 7224*c0909341SAndroid Build Coastguard Worker cmp eobd, 300 7225*c0909341SAndroid Build Coastguard Worker jl .fast 7226*c0909341SAndroid Build Coastguard Worker call .main 7227*c0909341SAndroid Build Coastguard Worker jmp .pass2 7228*c0909341SAndroid Build Coastguard Worker.dconly: 7229*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 7230*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 7231*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 7232*c0909341SAndroid Build Coastguard Worker or r3d, 32 7233*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly 7234*c0909341SAndroid Build Coastguard Worker.fast: 7235*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*71] 7236*c0909341SAndroid Build Coastguard Worker pxor m0, m0 7237*c0909341SAndroid Build Coastguard Worker.fast_loop: 7238*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 7239*c0909341SAndroid Build Coastguard Worker add r6, 32*8 7240*c0909341SAndroid Build Coastguard Worker cmp r6, r4 7241*c0909341SAndroid Build Coastguard Worker jl .fast_loop 7242*c0909341SAndroid Build Coastguard Worker.pass2: 7243*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+32*3] 7244*c0909341SAndroid Build Coastguard Worker mov r4, r6 7245*c0909341SAndroid Build Coastguard Worker lea r5, [r6+32*8] 7246*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 7247*c0909341SAndroid Build Coastguard Worker call .pass2_oddhalf 7248*c0909341SAndroid Build Coastguard Worker call .pass2_evenhalf 7249*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 7250*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 7251*c0909341SAndroid Build Coastguard Worker add r2, dstq 7252*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end 7253*c0909341SAndroid Build Coastguard Worker sub dstq, r3 7254*c0909341SAndroid Build Coastguard Worker lea r2, [r2+r3+32] 7255*c0909341SAndroid Build Coastguard Worker add dstq, 32 7256*c0909341SAndroid Build Coastguard Worker lea r3, [rsp+32*11] 7257*c0909341SAndroid Build Coastguard Worker call .pass2_oddhalf 7258*c0909341SAndroid Build Coastguard Worker call .pass2_evenhalf 7259*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 7260*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end 7261*c0909341SAndroid Build Coastguard Worker RET 7262*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7263*c0909341SAndroid Build Coastguard Worker.main: 7264*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 7265*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 7] 7266*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 9] 7267*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*15] 7268*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*17] 7269*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*23] 7270*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*25] 7271*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*31] 7272*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 7273*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 7274*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1 7275*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 3] 7276*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 5] 7277*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*11] 7278*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*13] 7279*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*19] 7280*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*21] 7281*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*27] 7282*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*29] 7283*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2 7284*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 2] 7285*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 6] 7286*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*10] 7287*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*14] 7288*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*18] 7289*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*22] 7290*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*26] 7291*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*30] 7292*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 7293*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 7294*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 4] 7295*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 8] 7296*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*12] 7297*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128*16] 7298*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*20] 7299*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*24] 7300*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*28] 7301*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 7302*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 7303*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_end 7304*c0909341SAndroid Build Coastguard Worker pxor m15, m15 7305*c0909341SAndroid Build Coastguard Worker mov r7d, 128*29 7306*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 7307*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m15 7308*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m15 7309*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m15 7310*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m15 7311*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 7312*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 7313*c0909341SAndroid Build Coastguard Worker add cq, 32 7314*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m0 7315*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m1 7316*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m2 7317*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m3 7318*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m4 7319*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m5 7320*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m6 7321*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m7 7322*c0909341SAndroid Build Coastguard Worker mova m0, [r5+32*3] 7323*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32*2] 7324*c0909341SAndroid Build Coastguard Worker mova m2, [r5+32*1] 7325*c0909341SAndroid Build Coastguard Worker mova m3, [r5+32*0] 7326*c0909341SAndroid Build Coastguard Worker mova m4, [r5-32*1] 7327*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*2] 7328*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*3] 7329*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*4] 7330*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 7331*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m0 7332*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m1 7333*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m2 7334*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m3 7335*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m4 7336*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m5 7337*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m6 7338*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m7 7339*c0909341SAndroid Build Coastguard Worker ret 7340*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7341*c0909341SAndroid Build Coastguard Worker.pass2_oddhalf: 7342*c0909341SAndroid Build Coastguard Worker mova m0, [r3+32* 1] ; 1 7343*c0909341SAndroid Build Coastguard Worker mova m1, [r3+32* 3] ; 3 7344*c0909341SAndroid Build Coastguard Worker mova m2, [r3+32* 5] ; 5 7345*c0909341SAndroid Build Coastguard Worker mova m3, [r3+32* 7] ; 7 7346*c0909341SAndroid Build Coastguard Worker mova m4, [r3+32*17] ; 9 7347*c0909341SAndroid Build Coastguard Worker mova m5, [r3+32*19] ; 11 7348*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*21] ; 13 7349*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*23] ; 15 7350*c0909341SAndroid Build Coastguard Worker mova m8, [r3+32*33] ; 17 7351*c0909341SAndroid Build Coastguard Worker mova m9, [r3+32*35] ; 19 7352*c0909341SAndroid Build Coastguard Worker mova m10, [r3+32*37] ; 21 7353*c0909341SAndroid Build Coastguard Worker mova m11, [r3+32*39] ; 23 7354*c0909341SAndroid Build Coastguard Worker mova m12, [r3+32*49] ; 25 7355*c0909341SAndroid Build Coastguard Worker mova m13, [r3+32*51] ; 27 7356*c0909341SAndroid Build Coastguard Worker mova m14, [r3+32*53] ; 29 7357*c0909341SAndroid Build Coastguard Worker mova m15, [r3+32*55] ; 31 7358*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 7359*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7360*c0909341SAndroid Build Coastguard Worker.pass2_evenhalf: 7361*c0909341SAndroid Build Coastguard Worker mova m0, [r3+32* 0] ; 0 7362*c0909341SAndroid Build Coastguard Worker mova m1, [r3+32* 2] ; 2 7363*c0909341SAndroid Build Coastguard Worker mova m2, [r3+32* 4] ; 4 7364*c0909341SAndroid Build Coastguard Worker mova m3, [r3+32* 6] ; 6 7365*c0909341SAndroid Build Coastguard Worker mova m4, [r3+32*16] ; 8 7366*c0909341SAndroid Build Coastguard Worker mova m5, [r3+32*18] ; 10 7367*c0909341SAndroid Build Coastguard Worker mova m6, [r3+32*20] ; 12 7368*c0909341SAndroid Build Coastguard Worker mova m7, [r3+32*22] ; 14 7369*c0909341SAndroid Build Coastguard Worker mova m8, [r3+32*32] ; 16 7370*c0909341SAndroid Build Coastguard Worker mova m9, [r3+32*34] ; 18 7371*c0909341SAndroid Build Coastguard Worker mova m10, [r3+32*36] ; 20 7372*c0909341SAndroid Build Coastguard Worker mova m11, [r3+32*38] ; 22 7373*c0909341SAndroid Build Coastguard Worker mova m12, [r3+32*48] ; 24 7374*c0909341SAndroid Build Coastguard Worker mova m13, [r3+32*50] ; 26 7375*c0909341SAndroid Build Coastguard Worker mova m14, [r3+32*52] ; 28 7376*c0909341SAndroid Build Coastguard Worker mova m15, [r3+32*54] ; 30 7377*c0909341SAndroid Build Coastguard Worker mova [rsp+gprsize], m15 7378*c0909341SAndroid Build Coastguard Worker jmp m(idct_16x16_internal_8bpc).main 7379*c0909341SAndroid Build Coastguard Worker 7380*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_10bpc, 4, 8, 8, dst, stride, c, eob 7381*c0909341SAndroid Build Coastguard Worker%undef cmp 7382*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_10bpc_max] 7383*c0909341SAndroid Build Coastguard Worker.pass1: 7384*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [pw_8192] 7385*c0909341SAndroid Build Coastguard Worker pxor m6, m6 7386*c0909341SAndroid Build Coastguard Worker lea r6, [strideq*3] 7387*c0909341SAndroid Build Coastguard Worker lea r5, [strideq*5] 7388*c0909341SAndroid Build Coastguard Worker lea r4, [strideq+r6*2] ; strideq*7 7389*c0909341SAndroid Build Coastguard Worker call .main ; 0 7390*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 7391*c0909341SAndroid Build Coastguard Worker jl .ret 7392*c0909341SAndroid Build Coastguard Worker add cq, 128*8 ; 0 1 7393*c0909341SAndroid Build Coastguard Worker mov r7, dstq ; 1 7394*c0909341SAndroid Build Coastguard Worker add dstq, 16 7395*c0909341SAndroid Build Coastguard Worker call .main 7396*c0909341SAndroid Build Coastguard Worker call .main2 7397*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 7398*c0909341SAndroid Build Coastguard Worker jl .ret 7399*c0909341SAndroid Build Coastguard Worker add cq, 128*16-32 ; 0 1 2 7400*c0909341SAndroid Build Coastguard Worker lea dstq, [r7+16*2] ; 1 2 7401*c0909341SAndroid Build Coastguard Worker call .main ; 2 7402*c0909341SAndroid Build Coastguard Worker call .main2 7403*c0909341SAndroid Build Coastguard Worker call .main2 7404*c0909341SAndroid Build Coastguard Worker cmp eobd, 300 7405*c0909341SAndroid Build Coastguard Worker jl .ret 7406*c0909341SAndroid Build Coastguard Worker add cq, 128*24-64 ; 0 1 2 3 7407*c0909341SAndroid Build Coastguard Worker add r7, 16*3 ; 1 2 3 7408*c0909341SAndroid Build Coastguard Worker mov dstq, r7 ; 2 3 7409*c0909341SAndroid Build Coastguard Worker call .main ; 3 7410*c0909341SAndroid Build Coastguard Worker call .main2 7411*c0909341SAndroid Build Coastguard Worker call .main2 7412*c0909341SAndroid Build Coastguard Worker call .main2 7413*c0909341SAndroid Build Coastguard Worker cmp eobd, 535 7414*c0909341SAndroid Build Coastguard Worker jl .ret 7415*c0909341SAndroid Build Coastguard Worker add cq, 128*24-64 ; 0 1 2 3 7416*c0909341SAndroid Build Coastguard Worker lea dstq, [r7+strideq*8] ; 1 2 3 4 7417*c0909341SAndroid Build Coastguard Worker mov r7, dstq ; 2 3 4 7418*c0909341SAndroid Build Coastguard Worker call .main ; 3 4 7419*c0909341SAndroid Build Coastguard Worker call .main2 7420*c0909341SAndroid Build Coastguard Worker call .main2 7421*c0909341SAndroid Build Coastguard Worker cmp eobd, 755 7422*c0909341SAndroid Build Coastguard Worker jl .ret 7423*c0909341SAndroid Build Coastguard Worker add cq, 128*16-32 ; 0 1 2 3 7424*c0909341SAndroid Build Coastguard Worker lea dstq, [r7+strideq*8] ; 1 2 3 4 7425*c0909341SAndroid Build Coastguard Worker call .main ; 2 3 4 5 7426*c0909341SAndroid Build Coastguard Worker call .main2 ; 3 4 5 7427*c0909341SAndroid Build Coastguard Worker cmp eobd, 911 7428*c0909341SAndroid Build Coastguard Worker jl .ret 7429*c0909341SAndroid Build Coastguard Worker add cq, 128*8 ; 0 1 2 3 7430*c0909341SAndroid Build Coastguard Worker add dstq, 16 ; 1 2 3 4 7431*c0909341SAndroid Build Coastguard Worker call .main ; 2 3 4 5 7432*c0909341SAndroid Build Coastguard Worker.ret: ; 3 4 5 6 7433*c0909341SAndroid Build Coastguard Worker RET 7434*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7435*c0909341SAndroid Build Coastguard Worker.main2: 7436*c0909341SAndroid Build Coastguard Worker sub cq, 128*8-32 7437*c0909341SAndroid Build Coastguard Worker lea dstq, [dstq+strideq*8-16] 7438*c0909341SAndroid Build Coastguard Worker.main: 7439*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128*0] 7440*c0909341SAndroid Build Coastguard Worker packssdw m0, [cq+128*1] 7441*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*2] 7442*c0909341SAndroid Build Coastguard Worker packssdw m1, [cq+128*3] 7443*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*4] 7444*c0909341SAndroid Build Coastguard Worker packssdw m2, [cq+128*5] 7445*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*6] 7446*c0909341SAndroid Build Coastguard Worker packssdw m3, [cq+128*7] 7447*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m5}, m0, m1, m2, m3 7448*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_8x32_10bpc).main_zero 7449*c0909341SAndroid Build Coastguard Worker 7450*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_identity_identity_32x32_12bpc, 4, 8, 8, dst, stride, c, eob 7451*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [pixel_12bpc_max] 7452*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_identity_identity_32x32_10bpc).pass1 7453*c0909341SAndroid Build Coastguard Worker 7454*c0909341SAndroid Build Coastguard Worker%macro IDCT64_PART2_END 6-10 ; out, src[1-2], tmp[1-3], (offset[1-4]) 7455*c0909341SAndroid Build Coastguard Worker%if %1 & 1 7456*c0909341SAndroid Build Coastguard Worker mova m%5, [r5-32*(51-%1)] ; idct16 out 0+n 7457*c0909341SAndroid Build Coastguard Worker mova m%4, [r4-32*(14+%1)] ; idct32 out31-n 7458*c0909341SAndroid Build Coastguard Worker%else 7459*c0909341SAndroid Build Coastguard Worker mova m%5, [r4-32*(45-%1)] 7460*c0909341SAndroid Build Coastguard Worker mova m%4, [r5-32*(20+%1)] 7461*c0909341SAndroid Build Coastguard Worker%endif 7462*c0909341SAndroid Build Coastguard Worker paddsw m%6, m%5, m%4 ; idct32 out 0+n 7463*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%4 ; idct32 out31-n 7464*c0909341SAndroid Build Coastguard Worker paddsw m%4, m%5, m%3 ; out31-n 7465*c0909341SAndroid Build Coastguard Worker psubsw m%5, m%3 ; out32+n 7466*c0909341SAndroid Build Coastguard Worker paddsw m%3, m%6, m%2 ; out 0+n 7467*c0909341SAndroid Build Coastguard Worker psubsw m%6, m%2 ; out63-n 7468*c0909341SAndroid Build Coastguard Worker REPX {pmulhrsw x, m14}, m%5, m%6, m%4, m%3 7469*c0909341SAndroid Build Coastguard Worker%if %1 & 1 7470*c0909341SAndroid Build Coastguard Worker %define %%d0 r2 7471*c0909341SAndroid Build Coastguard Worker %define %%d1 dstq 7472*c0909341SAndroid Build Coastguard Worker%else 7473*c0909341SAndroid Build Coastguard Worker %define %%d0 dstq 7474*c0909341SAndroid Build Coastguard Worker %define %%d1 r2 7475*c0909341SAndroid Build Coastguard Worker%endif 7476*c0909341SAndroid Build Coastguard Worker paddw m%3, [%%d0+%7 ] 7477*c0909341SAndroid Build Coastguard Worker paddw m%4, [%%d1+%8 ] 7478*c0909341SAndroid Build Coastguard Worker paddw m%5, [%%d0+%9 ] 7479*c0909341SAndroid Build Coastguard Worker paddw m%6, [%%d1+%10] 7480*c0909341SAndroid Build Coastguard Worker pxor m%2, m%2 7481*c0909341SAndroid Build Coastguard Worker REPX {pmaxsw x, m%2}, m%3, m%4, m%5, m%6 7482*c0909341SAndroid Build Coastguard Worker vpbroadcastd m%2, [pixel_10bpc_max] 7483*c0909341SAndroid Build Coastguard Worker REPX {pminsw x, m%2}, m%3, m%4, m%5, m%6 7484*c0909341SAndroid Build Coastguard Worker mova [%%d0+%7 ], m%3 7485*c0909341SAndroid Build Coastguard Worker mova [%%d1+%8 ], m%4 7486*c0909341SAndroid Build Coastguard Worker mova [%%d0+%9 ], m%5 7487*c0909341SAndroid Build Coastguard Worker mova [%%d1+%10], m%6 7488*c0909341SAndroid Build Coastguard Worker%endmacro 7489*c0909341SAndroid Build Coastguard Worker 7490*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_16x64_10bpc, 4, 7, 0, dst, stride, c, eob 7491*c0909341SAndroid Build Coastguard Worker test eobd, eobd 7492*c0909341SAndroid Build Coastguard Worker jz .dconly 7493*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 10, 16, 32*98, dst, stride, c, eob 7494*c0909341SAndroid Build Coastguard Worker%undef cmp 7495*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 7496*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 7497*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 7498*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 7499*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*6] 7500*c0909341SAndroid Build Coastguard Worker call .main 7501*c0909341SAndroid Build Coastguard Worker sub eobd, 44 7502*c0909341SAndroid Build Coastguard Worker jl .fast 7503*c0909341SAndroid Build Coastguard Worker call .main 7504*c0909341SAndroid Build Coastguard Worker sub eobd, 107 7505*c0909341SAndroid Build Coastguard Worker jl .fast 7506*c0909341SAndroid Build Coastguard Worker call .main 7507*c0909341SAndroid Build Coastguard Worker sub eobd, 128 7508*c0909341SAndroid Build Coastguard Worker jl .fast 7509*c0909341SAndroid Build Coastguard Worker call .main 7510*c0909341SAndroid Build Coastguard Worker jmp .pass2 7511*c0909341SAndroid Build Coastguard Worker.dconly: 7512*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 7513*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 7514*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 7515*c0909341SAndroid Build Coastguard Worker or r3d, 64 7516*c0909341SAndroid Build Coastguard Worker add r6d, 640 7517*c0909341SAndroid Build Coastguard Worker sar r6d, 10 7518*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_16x4_10bpc).dconly3 7519*c0909341SAndroid Build Coastguard Worker.fast: 7520*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*38] 7521*c0909341SAndroid Build Coastguard Worker pxor m0, m0 7522*c0909341SAndroid Build Coastguard Worker.fast_loop: 7523*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 7524*c0909341SAndroid Build Coastguard Worker add r6, 32*8 7525*c0909341SAndroid Build Coastguard Worker cmp r6, r4 7526*c0909341SAndroid Build Coastguard Worker jl .fast_loop 7527*c0909341SAndroid Build Coastguard Worker.pass2: 7528*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 7529*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32* 2] ; in0 7530*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32* 6] ; in4 7531*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+32*10] ; in8 7532*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+32*14] ; in12 7533*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+32*18] ; in16 7534*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+32*22] ; in20 7535*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+32*26] ; in24 7536*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+32*30] ; in28 7537*c0909341SAndroid Build Coastguard Worker pxor m8, m8 7538*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 7539*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 7540*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 7541*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 7542*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*38] 7543*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m0 7544*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m1 7545*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m2 7546*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m3 7547*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m4 7548*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m5 7549*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m6 7550*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m7 7551*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7552*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m8 7553*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m9 7554*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m10 7555*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m11 7556*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m12 7557*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m13 7558*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m14 7559*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m15 7560*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32* 4] ; in2 7561*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32* 8] ; in6 7562*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+32*12] ; in10 7563*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+32*16] ; in14 7564*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+32*20] ; in18 7565*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+32*24] ; in22 7566*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+32*28] ; in26 7567*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+32*32] ; in30 7568*c0909341SAndroid Build Coastguard Worker lea r5, [r4+32*16] 7569*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7570*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 7571*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32* 3] ; in1 7572*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*33] ; in31 7573*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+32*19] ; in17 7574*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+32*17] ; in15 7575*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+32*11] ; in9 7576*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+32*25] ; in23 7577*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+32*27] ; in25 7578*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+32* 9] ; in7 7579*c0909341SAndroid Build Coastguard Worker lea r6, [idct64_mul - 8] 7580*c0909341SAndroid Build Coastguard Worker add r4, 32*16 7581*c0909341SAndroid Build Coastguard Worker add r5, 32*32 7582*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 7583*c0909341SAndroid Build Coastguard Worker mova m0, [rsp+32* 7] ; in5 7584*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*29] ; in27 7585*c0909341SAndroid Build Coastguard Worker mova m2, [rsp+32*23] ; in21 7586*c0909341SAndroid Build Coastguard Worker mova m3, [rsp+32*13] ; in11 7587*c0909341SAndroid Build Coastguard Worker mova m4, [rsp+32*15] ; in13 7588*c0909341SAndroid Build Coastguard Worker mova m5, [rsp+32*21] ; in19 7589*c0909341SAndroid Build Coastguard Worker mova m6, [rsp+32*31] ; in29 7590*c0909341SAndroid Build Coastguard Worker mova m7, [rsp+32* 5] ; in3 7591*c0909341SAndroid Build Coastguard Worker add r6, 8 7592*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7593*c0909341SAndroid Build Coastguard Worker sub r5, 32*8 7594*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 7595*c0909341SAndroid Build Coastguard Worker lea r8, [strideq*4] 7596*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*5] 7597*c0909341SAndroid Build Coastguard Worker lea r3, [r9+strideq*1] ; stride*6 7598*c0909341SAndroid Build Coastguard Worker lea r7, [r9+strideq*2] ; stride*7 7599*c0909341SAndroid Build Coastguard Worker call .main_part2_pass2 7600*c0909341SAndroid Build Coastguard Worker RET 7601*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7602*c0909341SAndroid Build Coastguard Worker.main: 7603*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 7604*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 3] 7605*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 5] 7606*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 7] 7607*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128* 9] 7608*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*11] 7609*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*13] 7610*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*15] 7611*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf 7612*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 7613*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 2] 7614*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128* 4] 7615*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 6] 7616*c0909341SAndroid Build Coastguard Worker mova m4, [cq+128* 8] 7617*c0909341SAndroid Build Coastguard Worker mova m5, [cq+128*10] 7618*c0909341SAndroid Build Coastguard Worker mova m6, [cq+128*12] 7619*c0909341SAndroid Build Coastguard Worker mova m7, [cq+128*14] 7620*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 7621*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 7622*c0909341SAndroid Build Coastguard Worker pxor m15, m15 7623*c0909341SAndroid Build Coastguard Worker mov r7d, 128*13 7624*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 7625*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m15 7626*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m15 7627*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m15 7628*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m15 7629*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 7630*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 7631*c0909341SAndroid Build Coastguard Worker add cq, 32 7632*c0909341SAndroid Build Coastguard Worker psrld m15, m11, 10 ; pd_2 7633*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*4] 7634*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*3] 7635*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m15}, m0, m1, m2, m3, m4, m5, m6, m7 7636*c0909341SAndroid Build Coastguard Worker psubd m10, m0, m8 ; out15 7637*c0909341SAndroid Build Coastguard Worker paddd m0, m8 ; out0 7638*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*3] 7639*c0909341SAndroid Build Coastguard Worker psubd m15, m7, m9 ; out8 7640*c0909341SAndroid Build Coastguard Worker paddd m7, m9 ; out7 7641*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*2] 7642*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m0, m15, m10, m7 7643*c0909341SAndroid Build Coastguard Worker packssdw m0, m15 7644*c0909341SAndroid Build Coastguard Worker packssdw m7, m10 7645*c0909341SAndroid Build Coastguard Worker psubd m10, m1, m8 ; out14 7646*c0909341SAndroid Build Coastguard Worker paddd m1, m8 ; out1 7647*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*2] 7648*c0909341SAndroid Build Coastguard Worker psubd m15, m6, m9 ; out9 7649*c0909341SAndroid Build Coastguard Worker paddd m6, m9 ; out6 7650*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*1] 7651*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m1, m15, m10, m6 7652*c0909341SAndroid Build Coastguard Worker packssdw m1, m15 7653*c0909341SAndroid Build Coastguard Worker packssdw m6, m10 7654*c0909341SAndroid Build Coastguard Worker psubd m10, m2, m8 ; out13 7655*c0909341SAndroid Build Coastguard Worker paddd m2, m8 ; out2 7656*c0909341SAndroid Build Coastguard Worker mova m8, [r6-32*1] 7657*c0909341SAndroid Build Coastguard Worker psubd m15, m5, m9 ; out10 7658*c0909341SAndroid Build Coastguard Worker paddd m5, m9 ; out5 7659*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*0] 7660*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m2, m15, m10, m5 7661*c0909341SAndroid Build Coastguard Worker packssdw m2, m15 7662*c0909341SAndroid Build Coastguard Worker packssdw m5, m10 7663*c0909341SAndroid Build Coastguard Worker psubd m10, m3, m8 ; out12 7664*c0909341SAndroid Build Coastguard Worker paddd m3, m8 ; out3 7665*c0909341SAndroid Build Coastguard Worker psubd m15, m4, m9 ; out11 7666*c0909341SAndroid Build Coastguard Worker paddd m4, m9 ; out4 7667*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 2}, m3, m15, m10, m4 7668*c0909341SAndroid Build Coastguard Worker packssdw m3, m15 7669*c0909341SAndroid Build Coastguard Worker packssdw m4, m10 7670*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose3 7671*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 7672*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 7673*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m2 7674*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 7675*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m4 7676*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m5 7677*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m6 7678*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 7679*c0909341SAndroid Build Coastguard Worker add r6, 32*8 7680*c0909341SAndroid Build Coastguard Worker ret 7681*c0909341SAndroid Build Coastguard Worker.main_part2_pass2: 7682*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pw_1567_3784] 7683*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [pw_m3784_1567] 7684*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [pw_2896_2896] 7685*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 7686*c0909341SAndroid Build Coastguard Worker lea r2, [dstq+r7] 7687*c0909341SAndroid Build Coastguard Worker.main_part2_pass2_loop: 7688*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pw_m2896_2896] 7689*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part2_internal 7690*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pw_2048] 7691*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 0, 7, 0, 6, 9, 10, strideq*0, r3*4, r8*8, r7*8 7692*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 7, 8, 5, 0, 6, 7, strideq*0, r3*4, r8*8, r7*8 7693*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 8, 2, 1, 0, 6, 7, strideq*8, r8*4, r9*8, r3*8 7694*c0909341SAndroid Build Coastguard Worker IDCT64_PART2_END 15, 3, 4, 0, 6, 7, strideq*8, r8*4, r9*8, r3*8 7695*c0909341SAndroid Build Coastguard Worker add dstq, strideq 7696*c0909341SAndroid Build Coastguard Worker sub r2, strideq 7697*c0909341SAndroid Build Coastguard Worker cmp r4, r5 7698*c0909341SAndroid Build Coastguard Worker jne .main_part2_pass2_loop 7699*c0909341SAndroid Build Coastguard Worker ret 7700*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7701*c0909341SAndroid Build Coastguard Worker.main_part1_rect2: 7702*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 7703*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 7704*c0909341SAndroid Build Coastguard Worker.main_part1: ; idct64 steps 1-5 7705*c0909341SAndroid Build Coastguard Worker ; in1/31/17/15 -> t32a/33/34a/35/60/61a/62/63a 7706*c0909341SAndroid Build Coastguard Worker ; in7/25/23/ 9 -> t56a/57/58a/59/36/37a/38/39a 7707*c0909341SAndroid Build Coastguard Worker ; in5/27/21/11 -> t40a/41/42a/43/52/53a/54/55a 7708*c0909341SAndroid Build Coastguard Worker ; in3/29/19/13 -> t48a/49/50a/51/44/45a/46/47a 7709*c0909341SAndroid Build Coastguard Worker vpbroadcastd m7, [r5+4*0] 7710*c0909341SAndroid Build Coastguard Worker vpbroadcastd m8, [r5+4*1] 7711*c0909341SAndroid Build Coastguard Worker vpbroadcastd m6, [r5+4*2] 7712*c0909341SAndroid Build Coastguard Worker vpbroadcastd m9, [r5+4*3] 7713*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [r5+4*4] 7714*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r5+4*5] 7715*c0909341SAndroid Build Coastguard Worker vpbroadcastd m4, [r5+4*6] 7716*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [r5+4*7] 7717*c0909341SAndroid Build Coastguard Worker pmulld m7, m0 ; t63a 7718*c0909341SAndroid Build Coastguard Worker pmulld m0, m8 ; t32a 7719*c0909341SAndroid Build Coastguard Worker pmulld m6, m1 ; t62a 7720*c0909341SAndroid Build Coastguard Worker pmulld m1, m9 ; t33a 7721*c0909341SAndroid Build Coastguard Worker pmulld m5, m2 ; t61a 7722*c0909341SAndroid Build Coastguard Worker pmulld m2, m10 ; t34a 7723*c0909341SAndroid Build Coastguard Worker pmulld m4, m3 ; t60a 7724*c0909341SAndroid Build Coastguard Worker pmulld m3, m15 ; t35a 7725*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r5+4*8] 7726*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [r5+4*9] 7727*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m7, m0, m6, m1, m5, m2, m4, m3 7728*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m7, m6, m2, m3, m5, m4 7729*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m1 ; t33 7730*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; t32 7731*c0909341SAndroid Build Coastguard Worker psubd m1, m7, m6 ; t62 7732*c0909341SAndroid Build Coastguard Worker paddd m7, m6 ; t63 7733*c0909341SAndroid Build Coastguard Worker psubd m6, m3, m2 ; t34 7734*c0909341SAndroid Build Coastguard Worker paddd m3, m2 ; t35 7735*c0909341SAndroid Build Coastguard Worker psubd m2, m4, m5 ; t61 7736*c0909341SAndroid Build Coastguard Worker paddd m4, m5 ; t60 7737*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m1, m6, m2 7738*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m1, m6, m2 7739*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 8, 5, 9, _, 11, 10, 15 ; t33a, t62a 7740*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 2, 6, 5, 9, _, 11, 10, 15, 2 ; t61a, t34a 7741*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m3, m7, m4 7742*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m3, m7, m4 7743*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [r5+4*10] 7744*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [r5+4*11] 7745*c0909341SAndroid Build Coastguard Worker psubd m5, m0, m3 ; t35a 7746*c0909341SAndroid Build Coastguard Worker paddd m0, m3 ; t32a 7747*c0909341SAndroid Build Coastguard Worker psubd m3, m7, m4 ; t60a 7748*c0909341SAndroid Build Coastguard Worker paddd m7, m4 ; t63a 7749*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m6 ; t34 7750*c0909341SAndroid Build Coastguard Worker paddd m1, m6 ; t33 7751*c0909341SAndroid Build Coastguard Worker psubd m6, m8, m2 ; t61 7752*c0909341SAndroid Build Coastguard Worker paddd m8, m2 ; t62 7753*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m5, m3, m4, m6 7754*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m5, m3, m4, m6 7755*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 3, 5, 2, 9, _, 11, 10, 15 ; t35, t60 7756*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 6, 4, 2, 9, _, 11, 10, 15 ; t34a, t61a 7757*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m7, m1, m8 7758*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m7, m1, m8 7759*c0909341SAndroid Build Coastguard Worker add r5, 4*12 7760*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m0 7761*c0909341SAndroid Build Coastguard Worker mova [r6+32*3], m7 7762*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m1 7763*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m8 7764*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m6 7765*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m4 7766*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m3 7767*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m5 7768*c0909341SAndroid Build Coastguard Worker add r6, 32*8 7769*c0909341SAndroid Build Coastguard Worker ret 7770*c0909341SAndroid Build Coastguard Worker.main_part2: ; idct64 steps 6-9 7771*c0909341SAndroid Build Coastguard Worker lea r5, [r6+32*3] 7772*c0909341SAndroid Build Coastguard Worker sub r6, 32*4 7773*c0909341SAndroid Build Coastguard Worker vpbroadcastd m10, [pd_1567] 7774*c0909341SAndroid Build Coastguard Worker vpbroadcastd m15, [pd_3784] 7775*c0909341SAndroid Build Coastguard Worker.main_part2_loop: 7776*c0909341SAndroid Build Coastguard Worker mova m0, [r6-32*32] ; t32a 7777*c0909341SAndroid Build Coastguard Worker mova m1, [r5-32*24] ; t39a 7778*c0909341SAndroid Build Coastguard Worker mova m2, [r5-32*32] ; t63a 7779*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32*24] ; t56a 7780*c0909341SAndroid Build Coastguard Worker mova m4, [r6-32*16] ; t40a 7781*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32* 8] ; t47a 7782*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*16] ; t55a 7783*c0909341SAndroid Build Coastguard Worker mova m7, [r6-32* 8] ; t48a 7784*c0909341SAndroid Build Coastguard Worker psubd m8, m0, m1 ; t39 7785*c0909341SAndroid Build Coastguard Worker paddd m0, m1 ; t32 7786*c0909341SAndroid Build Coastguard Worker psubd m1, m2, m3 ; t56 7787*c0909341SAndroid Build Coastguard Worker paddd m2, m3 ; t63 7788*c0909341SAndroid Build Coastguard Worker psubd m3, m5, m4 ; t40 7789*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t47 7790*c0909341SAndroid Build Coastguard Worker psubd m4, m7, m6 ; t55 7791*c0909341SAndroid Build Coastguard Worker paddd m7, m6 ; t48 7792*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m1, m3, m4 7793*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m1, m3, m4 7794*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 1, 8, 6, 9, _, 11, 10, 15 ; t39a, t56a 7795*c0909341SAndroid Build Coastguard Worker ITX_MULSUB_2D 4, 3, 6, 9, _, 11, 10, 15, 2 ; t55a, t40a 7796*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m0, m2, m5, m7 7797*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m0, m5, m2, m7 7798*c0909341SAndroid Build Coastguard Worker psubd m6, m2, m7 ; t48a 7799*c0909341SAndroid Build Coastguard Worker paddd m2, m7 ; t63a 7800*c0909341SAndroid Build Coastguard Worker psubd m7, m0, m5 ; t47a 7801*c0909341SAndroid Build Coastguard Worker paddd m0, m5 ; t32a 7802*c0909341SAndroid Build Coastguard Worker psubd m5, m8, m4 ; t55 7803*c0909341SAndroid Build Coastguard Worker paddd m8, m4 ; t56 7804*c0909341SAndroid Build Coastguard Worker psubd m4, m1, m3 ; t40 7805*c0909341SAndroid Build Coastguard Worker paddd m1, m3 ; t39 7806*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m6, m7, m5, m4 7807*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m6, m7, m5, m4 7808*c0909341SAndroid Build Coastguard Worker REPX {pmulld x, m14}, m6, m7, m5, m4 7809*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m2, m0, m8, m1 7810*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m2, m0, m8, m1 7811*c0909341SAndroid Build Coastguard Worker paddd m6, m11 7812*c0909341SAndroid Build Coastguard Worker paddd m5, m11 7813*c0909341SAndroid Build Coastguard Worker psubd m3, m6, m7 ; t47 7814*c0909341SAndroid Build Coastguard Worker paddd m6, m7 ; t48 7815*c0909341SAndroid Build Coastguard Worker psubd m7, m5, m4 ; t40a 7816*c0909341SAndroid Build Coastguard Worker paddd m5, m4 ; t55a 7817*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12}, m3, m6, m7, m5 7818*c0909341SAndroid Build Coastguard Worker mova [r5-32* 8], m2 7819*c0909341SAndroid Build Coastguard Worker mova [r6-32*32], m0 7820*c0909341SAndroid Build Coastguard Worker mova [r6-32* 8], m8 7821*c0909341SAndroid Build Coastguard Worker mova [r5-32*32], m1 7822*c0909341SAndroid Build Coastguard Worker mova [r5-32*24], m3 7823*c0909341SAndroid Build Coastguard Worker mova [r6-32*16], m6 7824*c0909341SAndroid Build Coastguard Worker mova [r6-32*24], m7 7825*c0909341SAndroid Build Coastguard Worker mova [r5-32*16], m5 7826*c0909341SAndroid Build Coastguard Worker add r6, 32 7827*c0909341SAndroid Build Coastguard Worker sub r5, 32 7828*c0909341SAndroid Build Coastguard Worker cmp r6, r5 7829*c0909341SAndroid Build Coastguard Worker jl .main_part2_loop 7830*c0909341SAndroid Build Coastguard Worker ret 7831*c0909341SAndroid Build Coastguard Worker 7832*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_32x64_10bpc, 4, 7, 0, dst, stride, c, eob 7833*c0909341SAndroid Build Coastguard Worker test eobd, eobd 7834*c0909341SAndroid Build Coastguard Worker jz .dconly 7835*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 11, 16, 32*134, dst, stride, c, eob 7836*c0909341SAndroid Build Coastguard Worker%undef cmp 7837*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 7838*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 7839*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*6] 7840*c0909341SAndroid Build Coastguard Worker call .main 7841*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 7842*c0909341SAndroid Build Coastguard Worker jl .fast 7843*c0909341SAndroid Build Coastguard Worker call .main 7844*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 7845*c0909341SAndroid Build Coastguard Worker jl .fast 7846*c0909341SAndroid Build Coastguard Worker call .main 7847*c0909341SAndroid Build Coastguard Worker cmp eobd, 300 7848*c0909341SAndroid Build Coastguard Worker jl .fast 7849*c0909341SAndroid Build Coastguard Worker call .main 7850*c0909341SAndroid Build Coastguard Worker jmp .pass2 7851*c0909341SAndroid Build Coastguard Worker.dconly: 7852*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 7853*c0909341SAndroid Build Coastguard Worker vpbroadcastd m3, [dconly_10bpc] 7854*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 7855*c0909341SAndroid Build Coastguard Worker or r3d, 64 7856*c0909341SAndroid Build Coastguard Worker add r6d, 128 7857*c0909341SAndroid Build Coastguard Worker sar r6d, 8 7858*c0909341SAndroid Build Coastguard Worker imul r6d, 181 7859*c0909341SAndroid Build Coastguard Worker add r6d, 384 7860*c0909341SAndroid Build Coastguard Worker sar r6d, 9 7861*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_32x8_10bpc).dconly2 7862*c0909341SAndroid Build Coastguard Worker.fast: 7863*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*70] 7864*c0909341SAndroid Build Coastguard Worker pxor m0, m0 7865*c0909341SAndroid Build Coastguard Worker.fast_loop: 7866*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 7867*c0909341SAndroid Build Coastguard Worker add r6, 32*8 7868*c0909341SAndroid Build Coastguard Worker cmp r6, r4 7869*c0909341SAndroid Build Coastguard Worker jl .fast_loop 7870*c0909341SAndroid Build Coastguard Worker.pass2: 7871*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5 + 128] 7872*c0909341SAndroid Build Coastguard Worker mov r10, rsp 7873*c0909341SAndroid Build Coastguard Worker lea r8, [strideq*4] 7874*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*5] 7875*c0909341SAndroid Build Coastguard Worker lea r3, [r9+strideq*1] ; stride*6 7876*c0909341SAndroid Build Coastguard Worker lea r7, [r9+strideq*2] ; stride*7 7877*c0909341SAndroid Build Coastguard Worker.pass2_loop: 7878*c0909341SAndroid Build Coastguard Worker mova m0, [r10+32* 2] ; in0 7879*c0909341SAndroid Build Coastguard Worker mova m1, [r10+32* 6] ; in4 7880*c0909341SAndroid Build Coastguard Worker mova m2, [r10+32*18] ; in8 7881*c0909341SAndroid Build Coastguard Worker mova m3, [r10+32*22] ; in12 7882*c0909341SAndroid Build Coastguard Worker mova m4, [r10+32*34] ; in16 7883*c0909341SAndroid Build Coastguard Worker mova m5, [r10+32*38] ; in20 7884*c0909341SAndroid Build Coastguard Worker mova m6, [r10+32*50] ; in24 7885*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32*54] ; in28 7886*c0909341SAndroid Build Coastguard Worker pxor m8, m8 7887*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 7888*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 7889*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 7890*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 7891*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*70] 7892*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m0 7893*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m1 7894*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m2 7895*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m3 7896*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m4 7897*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m5 7898*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m6 7899*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m7 7900*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7901*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m8 7902*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m9 7903*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m10 7904*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m11 7905*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m12 7906*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m13 7907*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m14 7908*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m15 7909*c0909341SAndroid Build Coastguard Worker mova m0, [r10+32* 4] ; in2 7910*c0909341SAndroid Build Coastguard Worker mova m1, [r10+32* 8] ; in6 7911*c0909341SAndroid Build Coastguard Worker mova m2, [r10+32*20] ; in10 7912*c0909341SAndroid Build Coastguard Worker mova m3, [r10+32*24] ; in14 7913*c0909341SAndroid Build Coastguard Worker mova m4, [r10+32*36] ; in18 7914*c0909341SAndroid Build Coastguard Worker mova m5, [r10+32*40] ; in22 7915*c0909341SAndroid Build Coastguard Worker mova m6, [r10+32*52] ; in26 7916*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32*56] ; in30 7917*c0909341SAndroid Build Coastguard Worker lea r5, [r4+32*16] 7918*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7919*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 7920*c0909341SAndroid Build Coastguard Worker mova m0, [r10+32* 3] ; in1 7921*c0909341SAndroid Build Coastguard Worker mova m1, [r10+32*57] ; in31 7922*c0909341SAndroid Build Coastguard Worker mova m2, [r10+32*35] ; in17 7923*c0909341SAndroid Build Coastguard Worker mova m3, [r10+32*25] ; in15 7924*c0909341SAndroid Build Coastguard Worker mova m4, [r10+32*19] ; in9 7925*c0909341SAndroid Build Coastguard Worker mova m5, [r10+32*41] ; in23 7926*c0909341SAndroid Build Coastguard Worker mova m6, [r10+32*51] ; in25 7927*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32* 9] ; in7 7928*c0909341SAndroid Build Coastguard Worker lea r6, [idct64_mul - 8] 7929*c0909341SAndroid Build Coastguard Worker add r4, 32*16 7930*c0909341SAndroid Build Coastguard Worker add r5, 32*32 7931*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 7932*c0909341SAndroid Build Coastguard Worker mova m0, [r10+32* 7] ; in5 7933*c0909341SAndroid Build Coastguard Worker mova m1, [r10+32*53] ; in27 7934*c0909341SAndroid Build Coastguard Worker mova m2, [r10+32*39] ; in21 7935*c0909341SAndroid Build Coastguard Worker mova m3, [r10+32*21] ; in11 7936*c0909341SAndroid Build Coastguard Worker mova m4, [r10+32*23] ; in13 7937*c0909341SAndroid Build Coastguard Worker mova m5, [r10+32*37] ; in19 7938*c0909341SAndroid Build Coastguard Worker mova m6, [r10+32*55] ; in29 7939*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32* 5] ; in3 7940*c0909341SAndroid Build Coastguard Worker add r6, 8 7941*c0909341SAndroid Build Coastguard Worker add r4, 32*8 7942*c0909341SAndroid Build Coastguard Worker sub r5, 32*8 7943*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 7944*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2_pass2 7945*c0909341SAndroid Build Coastguard Worker add r10, 32*8 7946*c0909341SAndroid Build Coastguard Worker sub r4, 32*98 ; rsp+32*16 7947*c0909341SAndroid Build Coastguard Worker sub dstq, r8 7948*c0909341SAndroid Build Coastguard Worker add dstq, 32 7949*c0909341SAndroid Build Coastguard Worker cmp r10, r4 7950*c0909341SAndroid Build Coastguard Worker jl .pass2_loop 7951*c0909341SAndroid Build Coastguard Worker RET 7952*c0909341SAndroid Build Coastguard WorkerALIGN function_align 7953*c0909341SAndroid Build Coastguard Worker.main: 7954*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 7955*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 7956*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 1] 7957*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 7] 7958*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128* 9] 7959*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*15] 7960*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128*17] 7961*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*23] 7962*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*25] 7963*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*31] 7964*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_rect2 7965*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 3] 7966*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 5] 7967*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*11] 7968*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*13] 7969*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128*19] 7970*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*21] 7971*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*27] 7972*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*29] 7973*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_rect2 7974*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 2] 7975*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 6] 7976*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*10] 7977*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*14] 7978*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128*18] 7979*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*22] 7980*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*26] 7981*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*30] 7982*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_rect2 7983*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 0] 7984*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 4] 7985*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128* 8] 7986*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*12] 7987*c0909341SAndroid Build Coastguard Worker pmulld m4, m14, [cq+128*16] 7988*c0909341SAndroid Build Coastguard Worker pmulld m5, m14, [cq+128*20] 7989*c0909341SAndroid Build Coastguard Worker pmulld m6, m14, [cq+128*24] 7990*c0909341SAndroid Build Coastguard Worker pmulld m7, m14, [cq+128*28] 7991*c0909341SAndroid Build Coastguard Worker pxor m15, m15 7992*c0909341SAndroid Build Coastguard Worker mov r7d, 128*29 7993*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 7994*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m15 7995*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m15 7996*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m15 7997*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m15 7998*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 7999*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 8000*c0909341SAndroid Build Coastguard Worker add cq, 32 8001*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main_rect2 8002*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 8003*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).main_end 8004*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 8005*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m0 8006*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m1 8007*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m2 8008*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m3 8009*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m4 8010*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m5 8011*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m6 8012*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m7 8013*c0909341SAndroid Build Coastguard Worker mova m0, [r5+32*3] 8014*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32*2] 8015*c0909341SAndroid Build Coastguard Worker mova m2, [r5+32*1] 8016*c0909341SAndroid Build Coastguard Worker mova m3, [r5+32*0] 8017*c0909341SAndroid Build Coastguard Worker mova m4, [r5-32*1] 8018*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*2] 8019*c0909341SAndroid Build Coastguard Worker mova m6, [r5-32*3] 8020*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*4] 8021*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).transpose 8022*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m0 8023*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m1 8024*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m2 8025*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m3 8026*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m4 8027*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m5 8028*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m6 8029*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m7 8030*c0909341SAndroid Build Coastguard Worker ret 8031*c0909341SAndroid Build Coastguard Worker 8032*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x16_10bpc, 4, 7, 0, dst, stride, c, eob 8033*c0909341SAndroid Build Coastguard Worker test eobd, eobd 8034*c0909341SAndroid Build Coastguard Worker jnz .normal 8035*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 8036*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 8037*c0909341SAndroid Build Coastguard Worker or r3d, 16 8038*c0909341SAndroid Build Coastguard Worker.dconly: 8039*c0909341SAndroid Build Coastguard Worker add r6d, 640 8040*c0909341SAndroid Build Coastguard Worker sar r6d, 10 8041*c0909341SAndroid Build Coastguard Worker.dconly2: 8042*c0909341SAndroid Build Coastguard Worker vpbroadcastd m5, [dconly_10bpc] 8043*c0909341SAndroid Build Coastguard Worker imul r6d, 181 8044*c0909341SAndroid Build Coastguard Worker add r6d, 2176 8045*c0909341SAndroid Build Coastguard Worker sar r6d, 12 8046*c0909341SAndroid Build Coastguard Worker movd xm0, r6d 8047*c0909341SAndroid Build Coastguard Worker paddsw xm0, xm5 8048*c0909341SAndroid Build Coastguard Worker vpbroadcastw m0, xm0 8049*c0909341SAndroid Build Coastguard Worker.dconly_loop: 8050*c0909341SAndroid Build Coastguard Worker paddsw m1, m0, [dstq+32*0] 8051*c0909341SAndroid Build Coastguard Worker paddsw m2, m0, [dstq+32*1] 8052*c0909341SAndroid Build Coastguard Worker paddsw m3, m0, [dstq+32*2] 8053*c0909341SAndroid Build Coastguard Worker paddsw m4, m0, [dstq+32*3] 8054*c0909341SAndroid Build Coastguard Worker REPX {psubusw x, m5}, m1, m2, m3, m4 8055*c0909341SAndroid Build Coastguard Worker mova [dstq+32*0], m1 8056*c0909341SAndroid Build Coastguard Worker mova [dstq+32*1], m2 8057*c0909341SAndroid Build Coastguard Worker mova [dstq+32*2], m3 8058*c0909341SAndroid Build Coastguard Worker mova [dstq+32*3], m4 8059*c0909341SAndroid Build Coastguard Worker add dstq, strideq 8060*c0909341SAndroid Build Coastguard Worker dec r3d 8061*c0909341SAndroid Build Coastguard Worker jg .dconly_loop 8062*c0909341SAndroid Build Coastguard Worker RET 8063*c0909341SAndroid Build Coastguard Worker.normal: 8064*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*96, dst, stride, c, eob 8065*c0909341SAndroid Build Coastguard Worker%undef cmp 8066*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 8067*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 8068*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 8069*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 8070*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*4] 8071*c0909341SAndroid Build Coastguard Worker call .main 8072*c0909341SAndroid Build Coastguard Worker call .shift_transpose 8073*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 8074*c0909341SAndroid Build Coastguard Worker jl .fast 8075*c0909341SAndroid Build Coastguard Worker call .main 8076*c0909341SAndroid Build Coastguard Worker call .shift_transpose 8077*c0909341SAndroid Build Coastguard Worker jmp .pass2 8078*c0909341SAndroid Build Coastguard Worker.fast: 8079*c0909341SAndroid Build Coastguard Worker pxor m0, m0 8080*c0909341SAndroid Build Coastguard Worker mov r3d, 4 8081*c0909341SAndroid Build Coastguard Worker.fast_loop: 8082*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 8083*c0909341SAndroid Build Coastguard Worker add r6, 32*8 8084*c0909341SAndroid Build Coastguard Worker dec r3d 8085*c0909341SAndroid Build Coastguard Worker jg .fast_loop 8086*c0909341SAndroid Build Coastguard Worker.pass2: 8087*c0909341SAndroid Build Coastguard Worker lea r7, [r6-32*64] 8088*c0909341SAndroid Build Coastguard Worker lea r4, [r6-32*32] 8089*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 8090*c0909341SAndroid Build Coastguard Worker mov r5, dstq 8091*c0909341SAndroid Build Coastguard Worker.pass2_loop: 8092*c0909341SAndroid Build Coastguard Worker mova m0, [r7-32*4] 8093*c0909341SAndroid Build Coastguard Worker mova m1, [r7-32*3] 8094*c0909341SAndroid Build Coastguard Worker mova m2, [r7-32*2] 8095*c0909341SAndroid Build Coastguard Worker mova m3, [r7-32*1] 8096*c0909341SAndroid Build Coastguard Worker mova m4, [r7+32*0] 8097*c0909341SAndroid Build Coastguard Worker mova m5, [r7+32*1] 8098*c0909341SAndroid Build Coastguard Worker mova m6, [r7+32*2] 8099*c0909341SAndroid Build Coastguard Worker mova m7, [r7+32*3] 8100*c0909341SAndroid Build Coastguard Worker add r7, 32*32 8101*c0909341SAndroid Build Coastguard Worker mova m8, [r7-32*4] 8102*c0909341SAndroid Build Coastguard Worker mova m9, [r7-32*3] 8103*c0909341SAndroid Build Coastguard Worker mova m10, [r7-32*2] 8104*c0909341SAndroid Build Coastguard Worker mova m11, [r7-32*1] 8105*c0909341SAndroid Build Coastguard Worker mova m12, [r7+32*0] 8106*c0909341SAndroid Build Coastguard Worker mova m13, [r7+32*1] 8107*c0909341SAndroid Build Coastguard Worker mova m14, [r7+32*2] 8108*c0909341SAndroid Build Coastguard Worker mova m15, [r7+32*3] 8109*c0909341SAndroid Build Coastguard Worker sub r7, 32*24 8110*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 8111*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 8112*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 8113*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_32x16_10bpc).write_16x16 8114*c0909341SAndroid Build Coastguard Worker add r5, 32 8115*c0909341SAndroid Build Coastguard Worker mov dstq, r5 8116*c0909341SAndroid Build Coastguard Worker cmp r7, r4 8117*c0909341SAndroid Build Coastguard Worker jl .pass2_loop 8118*c0909341SAndroid Build Coastguard Worker RET 8119*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8120*c0909341SAndroid Build Coastguard Worker.main: 8121*c0909341SAndroid Build Coastguard Worker lea r5, [idct64_mul_16bpc] 8122*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 1] 8123*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*31] 8124*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*17] 8125*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*15] 8126*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8127*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 7] 8128*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*25] 8129*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*23] 8130*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64* 9] 8131*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8132*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 5] 8133*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*27] 8134*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*21] 8135*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*11] 8136*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8137*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 3] 8138*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*29] 8139*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*19] 8140*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*13] 8141*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8142*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2 8143*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 2] 8144*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*14] 8145*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*18] 8146*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*30] 8147*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast 8148*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 6] 8149*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*10] 8150*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*22] 8151*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*26] 8152*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast 8153*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 4] 8154*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64*12] 8155*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*20] 8156*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*28] 8157*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_fast 8158*c0909341SAndroid Build Coastguard Worker mova m0, [cq+64* 0] 8159*c0909341SAndroid Build Coastguard Worker mova m1, [cq+64* 8] 8160*c0909341SAndroid Build Coastguard Worker mova m2, [cq+64*16] 8161*c0909341SAndroid Build Coastguard Worker mova m3, [cq+64*24] 8162*c0909341SAndroid Build Coastguard Worker pxor m15, m15 8163*c0909341SAndroid Build Coastguard Worker mov r7d, 64*30 8164*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 8165*c0909341SAndroid Build Coastguard Worker mova [cq+r7-64*2], m15 8166*c0909341SAndroid Build Coastguard Worker mova [cq+r7-64*1], m15 8167*c0909341SAndroid Build Coastguard Worker mova [cq+r7+64*0], m15 8168*c0909341SAndroid Build Coastguard Worker mova [cq+r7+64*1], m15 8169*c0909341SAndroid Build Coastguard Worker sub r7d, 64*4 8170*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 8171*c0909341SAndroid Build Coastguard Worker.main_end: 8172*c0909341SAndroid Build Coastguard Worker psrld m15, m11, 10 ; pd_2 8173*c0909341SAndroid Build Coastguard Worker.main_end2: 8174*c0909341SAndroid Build Coastguard Worker add cq, 32 8175*c0909341SAndroid Build Coastguard Worker pxor m4, m4 8176*c0909341SAndroid Build Coastguard Worker REPX {mova x, m4}, m5, m6, m7 8177*c0909341SAndroid Build Coastguard Worker call m(idct_8x8_internal_10bpc).main 8178*c0909341SAndroid Build Coastguard Worker add r6, 32*8 8179*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_evenhalf 8180*c0909341SAndroid Build Coastguard Worker mova [r6+32*2], m1 8181*c0909341SAndroid Build Coastguard Worker mova [r6+32*1], m2 8182*c0909341SAndroid Build Coastguard Worker mova [r6+32*0], m3 8183*c0909341SAndroid Build Coastguard Worker mova [r6-32*1], m4 8184*c0909341SAndroid Build Coastguard Worker mova [r6-32*2], m5 8185*c0909341SAndroid Build Coastguard Worker mova [r6-32*3], m6 8186*c0909341SAndroid Build Coastguard Worker mova [r6-32*4], m7 8187*c0909341SAndroid Build Coastguard Worker jmp .main_end_loop_start 8188*c0909341SAndroid Build Coastguard Worker.main_end_loop: 8189*c0909341SAndroid Build Coastguard Worker mova m0, [r6+32* 3] ; idct8 0 + n 8190*c0909341SAndroid Build Coastguard Worker.main_end_loop_start: 8191*c0909341SAndroid Build Coastguard Worker mova m1, [r5+32* 4] ; idct16 15 - n 8192*c0909341SAndroid Build Coastguard Worker mova m2, [r5-32*12] ; idct32 16 + n 8193*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32*13] ; idct32 31 - n 8194*c0909341SAndroid Build Coastguard Worker mova m4, [r6-32*29] ; idct64 63 - n 8195*c0909341SAndroid Build Coastguard Worker mova m5, [r5-32*28] ; idct64 48 + n 8196*c0909341SAndroid Build Coastguard Worker mova m6, [r6-32*45] ; idct64 47 - n 8197*c0909341SAndroid Build Coastguard Worker mova m7, [r5-32*44] ; idct64 32 + n 8198*c0909341SAndroid Build Coastguard Worker paddd m8, m0, m1 ; idct16 out0 + n 8199*c0909341SAndroid Build Coastguard Worker psubd m0, m1 ; idct16 out15 - n 8200*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m8, m0 8201*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m8, m0 8202*c0909341SAndroid Build Coastguard Worker paddd m1, m8, m3 ; idct32 out0 + n 8203*c0909341SAndroid Build Coastguard Worker psubd m8, m3 ; idct32 out31 - n 8204*c0909341SAndroid Build Coastguard Worker paddd m3, m0, m2 ; idct32 out15 - n 8205*c0909341SAndroid Build Coastguard Worker psubd m0, m2 ; idct32 out16 + n 8206*c0909341SAndroid Build Coastguard Worker REPX {pmaxsd x, m12}, m1, m8, m3, m0 8207*c0909341SAndroid Build Coastguard Worker REPX {pminsd x, m13}, m1, m3, m8, m0 8208*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m15}, m1, m3, m0, m8 8209*c0909341SAndroid Build Coastguard Worker paddd m2, m1, m4 ; idct64 out0 + n (unshifted) 8210*c0909341SAndroid Build Coastguard Worker psubd m1, m4 ; idct64 out63 - n (unshifted) 8211*c0909341SAndroid Build Coastguard Worker paddd m4, m3, m5 ; idct64 out15 - n (unshifted) 8212*c0909341SAndroid Build Coastguard Worker psubd m3, m5 ; idct64 out48 + n (unshifted) 8213*c0909341SAndroid Build Coastguard Worker paddd m5, m0, m6 ; idct64 out16 + n (unshifted) 8214*c0909341SAndroid Build Coastguard Worker psubd m0, m6 ; idct64 out47 - n (unshifted) 8215*c0909341SAndroid Build Coastguard Worker paddd m6, m8, m7 ; idct64 out31 - n (unshifted) 8216*c0909341SAndroid Build Coastguard Worker psubd m8, m7 ; idct64 out32 + n (unshifted) 8217*c0909341SAndroid Build Coastguard Worker mova [r5-32*44], m2 8218*c0909341SAndroid Build Coastguard Worker mova [r6+32* 3], m1 8219*c0909341SAndroid Build Coastguard Worker mova [r6-32*45], m4 8220*c0909341SAndroid Build Coastguard Worker mova [r5+32* 4], m3 8221*c0909341SAndroid Build Coastguard Worker mova [r5-32*28], m5 8222*c0909341SAndroid Build Coastguard Worker mova [r6-32*13], m0 8223*c0909341SAndroid Build Coastguard Worker mova [r6-32*29], m6 8224*c0909341SAndroid Build Coastguard Worker mova [r5-32*12], m8 8225*c0909341SAndroid Build Coastguard Worker add r5, 32 8226*c0909341SAndroid Build Coastguard Worker sub r6, 32 8227*c0909341SAndroid Build Coastguard Worker cmp r5, r6 8228*c0909341SAndroid Build Coastguard Worker jl .main_end_loop 8229*c0909341SAndroid Build Coastguard Worker ret 8230*c0909341SAndroid Build Coastguard Worker.shift_transpose: 8231*c0909341SAndroid Build Coastguard Worker%macro IDCT64_SHIFT_TRANSPOSE 1 ; shift 8232*c0909341SAndroid Build Coastguard Worker sub r6, 32*48 8233*c0909341SAndroid Build Coastguard Worker mov r5, r6 8234*c0909341SAndroid Build Coastguard Worker%%loop: 8235*c0909341SAndroid Build Coastguard Worker mova m0, [r6-32* 4] 8236*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32* 4] 8237*c0909341SAndroid Build Coastguard Worker mova m1, [r6-32* 3] 8238*c0909341SAndroid Build Coastguard Worker mova m5, [r6+32* 5] 8239*c0909341SAndroid Build Coastguard Worker mova m2, [r6-32* 2] 8240*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32* 6] 8241*c0909341SAndroid Build Coastguard Worker mova m3, [r6-32* 1] 8242*c0909341SAndroid Build Coastguard Worker mova m7, [r6+32* 7] 8243*c0909341SAndroid Build Coastguard Worker REPX {psrad x, %1}, m0, m4, m1, m5, m2, m6, m3, m7 8244*c0909341SAndroid Build Coastguard Worker packssdw m0, m4 8245*c0909341SAndroid Build Coastguard Worker packssdw m1, m5 8246*c0909341SAndroid Build Coastguard Worker packssdw m2, m6 8247*c0909341SAndroid Build Coastguard Worker packssdw m3, m7 8248*c0909341SAndroid Build Coastguard Worker mova m4, [r6+32* 0] 8249*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32* 8] 8250*c0909341SAndroid Build Coastguard Worker mova m5, [r6+32* 1] 8251*c0909341SAndroid Build Coastguard Worker mova m7, [r6+32* 9] 8252*c0909341SAndroid Build Coastguard Worker REPX {psrad x, %1}, m4, m6, m5, m7 8253*c0909341SAndroid Build Coastguard Worker packssdw m4, m6 8254*c0909341SAndroid Build Coastguard Worker packssdw m5, m7 8255*c0909341SAndroid Build Coastguard Worker mova m6, [r6+32* 2] 8256*c0909341SAndroid Build Coastguard Worker mova m8, [r6+32*10] 8257*c0909341SAndroid Build Coastguard Worker mova m7, [r6+32* 3] 8258*c0909341SAndroid Build Coastguard Worker mova m9, [r6+32*11] 8259*c0909341SAndroid Build Coastguard Worker REPX {psrad x, %1}, m6, m8, m7, m9 8260*c0909341SAndroid Build Coastguard Worker packssdw m6, m8 8261*c0909341SAndroid Build Coastguard Worker packssdw m7, m9 8262*c0909341SAndroid Build Coastguard Worker call m(idct_16x8_internal_10bpc).transpose3 8263*c0909341SAndroid Build Coastguard Worker mova [r5-32*4], m0 8264*c0909341SAndroid Build Coastguard Worker mova [r5-32*3], m1 8265*c0909341SAndroid Build Coastguard Worker mova [r5-32*2], m2 8266*c0909341SAndroid Build Coastguard Worker mova [r5-32*1], m3 8267*c0909341SAndroid Build Coastguard Worker mova [r5+32*0], m4 8268*c0909341SAndroid Build Coastguard Worker mova [r5+32*1], m5 8269*c0909341SAndroid Build Coastguard Worker mova [r5+32*2], m6 8270*c0909341SAndroid Build Coastguard Worker mova [r5+32*3], m7 8271*c0909341SAndroid Build Coastguard Worker add r6, 32*16 8272*c0909341SAndroid Build Coastguard Worker add r5, 32*8 8273*c0909341SAndroid Build Coastguard Worker cmp r5, r4 8274*c0909341SAndroid Build Coastguard Worker jl %%loop 8275*c0909341SAndroid Build Coastguard Worker mov r6, r4 8276*c0909341SAndroid Build Coastguard Worker%endmacro 8277*c0909341SAndroid Build Coastguard Worker IDCT64_SHIFT_TRANSPOSE 2 8278*c0909341SAndroid Build Coastguard Worker ret 8279*c0909341SAndroid Build Coastguard Worker 8280*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x32_10bpc, 4, 7, 0, dst, stride, c, eob 8281*c0909341SAndroid Build Coastguard Worker test eobd, eobd 8282*c0909341SAndroid Build Coastguard Worker jz .dconly 8283*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 8, 16, 32*163, dst, stride, c, eob 8284*c0909341SAndroid Build Coastguard Worker%undef cmp 8285*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 8286*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 8287*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 8288*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 8289*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*7] 8290*c0909341SAndroid Build Coastguard Worker call .main 8291*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 8292*c0909341SAndroid Build Coastguard Worker jl .fast 8293*c0909341SAndroid Build Coastguard Worker call .main 8294*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 8295*c0909341SAndroid Build Coastguard Worker jl .fast 8296*c0909341SAndroid Build Coastguard Worker call .main 8297*c0909341SAndroid Build Coastguard Worker cmp eobd, 300 8298*c0909341SAndroid Build Coastguard Worker jl .fast 8299*c0909341SAndroid Build Coastguard Worker call .main 8300*c0909341SAndroid Build Coastguard Worker jmp .pass2 8301*c0909341SAndroid Build Coastguard Worker.dconly: 8302*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 8303*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 8304*c0909341SAndroid Build Coastguard Worker or r3d, 32 8305*c0909341SAndroid Build Coastguard Worker add r6d, 128 8306*c0909341SAndroid Build Coastguard Worker sar r6d, 8 8307*c0909341SAndroid Build Coastguard Worker imul r6d, 181 8308*c0909341SAndroid Build Coastguard Worker add r6d, 384 8309*c0909341SAndroid Build Coastguard Worker sar r6d, 9 8310*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly2 8311*c0909341SAndroid Build Coastguard Worker.fast: 8312*c0909341SAndroid Build Coastguard Worker pxor m0, m0 8313*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*135] 8314*c0909341SAndroid Build Coastguard Worker.fast_loop: 8315*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 8316*c0909341SAndroid Build Coastguard Worker add r6, 32*8 8317*c0909341SAndroid Build Coastguard Worker cmp r6, r4 8318*c0909341SAndroid Build Coastguard Worker jl .fast_loop 8319*c0909341SAndroid Build Coastguard Worker.pass2: 8320*c0909341SAndroid Build Coastguard Worker lea r7, [r6-32*32] 8321*c0909341SAndroid Build Coastguard Worker lea r5, [r6+32*8] 8322*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 8323*c0909341SAndroid Build Coastguard Worker imul r2, strideq, 19 8324*c0909341SAndroid Build Coastguard Worker lea r3, [strideq*3] 8325*c0909341SAndroid Build Coastguard Worker add r2, dstq 8326*c0909341SAndroid Build Coastguard Worker.pass2_loop: 8327*c0909341SAndroid Build Coastguard Worker mova m0, [r7-32*99] 8328*c0909341SAndroid Build Coastguard Worker mova m1, [r7-32*97] 8329*c0909341SAndroid Build Coastguard Worker mova m2, [r7-32*95] 8330*c0909341SAndroid Build Coastguard Worker mova m3, [r7-32*93] 8331*c0909341SAndroid Build Coastguard Worker mova m4, [r7-32*67] 8332*c0909341SAndroid Build Coastguard Worker mova m5, [r7-32*65] 8333*c0909341SAndroid Build Coastguard Worker mova m6, [r7-32*63] 8334*c0909341SAndroid Build Coastguard Worker mova m7, [r7-32*61] 8335*c0909341SAndroid Build Coastguard Worker mova m8, [r7-32*35] 8336*c0909341SAndroid Build Coastguard Worker mova m9, [r7-32*33] 8337*c0909341SAndroid Build Coastguard Worker mova m10, [r7-32*31] 8338*c0909341SAndroid Build Coastguard Worker mova m11, [r7-32*29] 8339*c0909341SAndroid Build Coastguard Worker mova m12, [r7-32* 3] 8340*c0909341SAndroid Build Coastguard Worker mova m13, [r7-32* 1] 8341*c0909341SAndroid Build Coastguard Worker mova m14, [r7+32* 1] 8342*c0909341SAndroid Build Coastguard Worker mova m15, [r7+32* 3] 8343*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf 8344*c0909341SAndroid Build Coastguard Worker mova m0, [r7-32*100] 8345*c0909341SAndroid Build Coastguard Worker mova m1, [r7-32*98] 8346*c0909341SAndroid Build Coastguard Worker mova m2, [r7-32*96] 8347*c0909341SAndroid Build Coastguard Worker mova m3, [r7-32*94] 8348*c0909341SAndroid Build Coastguard Worker mova m4, [r7-32*68] 8349*c0909341SAndroid Build Coastguard Worker mova m5, [r7-32*66] 8350*c0909341SAndroid Build Coastguard Worker mova m6, [r7-32*64] 8351*c0909341SAndroid Build Coastguard Worker mova m7, [r7-32*62] 8352*c0909341SAndroid Build Coastguard Worker mova m8, [r7-32*36] 8353*c0909341SAndroid Build Coastguard Worker mova m9, [r7-32*34] 8354*c0909341SAndroid Build Coastguard Worker mova m10, [r7-32*32] 8355*c0909341SAndroid Build Coastguard Worker mova m11, [r7-32*30] 8356*c0909341SAndroid Build Coastguard Worker mova m12, [r7-32* 4] 8357*c0909341SAndroid Build Coastguard Worker mova m13, [r7-32* 2] 8358*c0909341SAndroid Build Coastguard Worker mova m14, [r7+32* 0] 8359*c0909341SAndroid Build Coastguard Worker mova m15, [r7+32* 2] 8360*c0909341SAndroid Build Coastguard Worker add r7, 32*8 8361*c0909341SAndroid Build Coastguard Worker mova [rsp], m15 8362*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 8363*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_10bpc).pass2_end 8364*c0909341SAndroid Build Coastguard Worker sub dstq, r3 8365*c0909341SAndroid Build Coastguard Worker lea r2, [r2+r3+32] 8366*c0909341SAndroid Build Coastguard Worker add dstq, 32 8367*c0909341SAndroid Build Coastguard Worker cmp r7, r4 8368*c0909341SAndroid Build Coastguard Worker jl .pass2_loop 8369*c0909341SAndroid Build Coastguard Worker RET 8370*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8371*c0909341SAndroid Build Coastguard Worker.main: 8372*c0909341SAndroid Build Coastguard Worker lea r5, [idct64_mul_16bpc] 8373*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 1] 8374*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*31] 8375*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*17] 8376*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*15] 8377*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2 8378*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 7] 8379*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*25] 8380*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*23] 8381*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128* 9] 8382*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2 8383*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 5] 8384*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*27] 8385*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*21] 8386*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*11] 8387*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2 8388*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 3] 8389*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*29] 8390*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*19] 8391*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*13] 8392*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1_rect2 8393*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2 8394*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 2] 8395*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*14] 8396*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*18] 8397*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*30] 8398*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast_rect2 8399*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 6] 8400*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*10] 8401*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*22] 8402*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*26] 8403*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast_rect2 8404*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 4] 8405*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128*12] 8406*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*20] 8407*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*28] 8408*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_fast_rect2 8409*c0909341SAndroid Build Coastguard Worker pmulld m0, m14, [cq+128* 0] 8410*c0909341SAndroid Build Coastguard Worker pmulld m1, m14, [cq+128* 8] 8411*c0909341SAndroid Build Coastguard Worker pmulld m2, m14, [cq+128*16] 8412*c0909341SAndroid Build Coastguard Worker pmulld m3, m14, [cq+128*24] 8413*c0909341SAndroid Build Coastguard Worker pxor m15, m15 8414*c0909341SAndroid Build Coastguard Worker mov r7d, 128*29 8415*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 8416*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m15 8417*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m15 8418*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m15 8419*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m15 8420*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 8421*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 8422*c0909341SAndroid Build Coastguard Worker psrld m15, m11, 11 ; pd_1 8423*c0909341SAndroid Build Coastguard Worker REPX {paddd x, m11}, m0, m1, m2, m3 8424*c0909341SAndroid Build Coastguard Worker REPX {psrad x, 12 }, m0, m1, m2, m3 8425*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end2 8426*c0909341SAndroid Build Coastguard Worker IDCT64_SHIFT_TRANSPOSE 1 8427*c0909341SAndroid Build Coastguard Worker ret 8428*c0909341SAndroid Build Coastguard Worker 8429*c0909341SAndroid Build Coastguard Workercglobal inv_txfm_add_dct_dct_64x64_10bpc, 4, 7, 0, dst, stride, c, eob 8430*c0909341SAndroid Build Coastguard Worker test eobd, eobd 8431*c0909341SAndroid Build Coastguard Worker jz .dconly 8432*c0909341SAndroid Build Coastguard Worker PROLOGUE 0, 11, 16, 32*195, dst, stride, c, eob 8433*c0909341SAndroid Build Coastguard Worker%undef cmp 8434*c0909341SAndroid Build Coastguard Worker vpbroadcastd m11, [pd_2048] 8435*c0909341SAndroid Build Coastguard Worker vpbroadcastd m12, [clip_18b_min] 8436*c0909341SAndroid Build Coastguard Worker vpbroadcastd m13, [clip_18b_max] 8437*c0909341SAndroid Build Coastguard Worker vpbroadcastd m14, [pd_2896] 8438*c0909341SAndroid Build Coastguard Worker lea r6, [rsp+32*7] 8439*c0909341SAndroid Build Coastguard Worker call .main 8440*c0909341SAndroid Build Coastguard Worker cmp eobd, 36 8441*c0909341SAndroid Build Coastguard Worker jl .fast 8442*c0909341SAndroid Build Coastguard Worker call .main 8443*c0909341SAndroid Build Coastguard Worker cmp eobd, 136 8444*c0909341SAndroid Build Coastguard Worker jl .fast 8445*c0909341SAndroid Build Coastguard Worker call .main 8446*c0909341SAndroid Build Coastguard Worker cmp eobd, 300 8447*c0909341SAndroid Build Coastguard Worker jl .fast 8448*c0909341SAndroid Build Coastguard Worker call .main 8449*c0909341SAndroid Build Coastguard Worker jmp .pass2 8450*c0909341SAndroid Build Coastguard Worker.dconly: 8451*c0909341SAndroid Build Coastguard Worker imul r6d, [cq], 181 8452*c0909341SAndroid Build Coastguard Worker mov [cq], eobd ; 0 8453*c0909341SAndroid Build Coastguard Worker or r3d, 64 8454*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_10bpc).dconly 8455*c0909341SAndroid Build Coastguard Worker.fast: 8456*c0909341SAndroid Build Coastguard Worker pxor m0, m0 8457*c0909341SAndroid Build Coastguard Worker lea r4, [rsp+32*135] 8458*c0909341SAndroid Build Coastguard Worker.fast_loop: 8459*c0909341SAndroid Build Coastguard Worker REPX {mova [r6+32*x], m0}, -4, -3, -2, -1, 0, 1, 2, 3 8460*c0909341SAndroid Build Coastguard Worker add r6, 32*8 8461*c0909341SAndroid Build Coastguard Worker cmp r6, r4 8462*c0909341SAndroid Build Coastguard Worker jl .fast_loop 8463*c0909341SAndroid Build Coastguard Worker.pass2: 8464*c0909341SAndroid Build Coastguard Worker lea r10, [r6-32*32] 8465*c0909341SAndroid Build Coastguard Worker lea r6, [pw_5+128] 8466*c0909341SAndroid Build Coastguard Worker lea r8, [strideq*4] 8467*c0909341SAndroid Build Coastguard Worker lea r9, [strideq*5] 8468*c0909341SAndroid Build Coastguard Worker lea r3, [r9+strideq*1] ; stride*6 8469*c0909341SAndroid Build Coastguard Worker lea r7, [r9+strideq*2] ; stride*7 8470*c0909341SAndroid Build Coastguard Worker.pass2_loop: 8471*c0909341SAndroid Build Coastguard Worker mova m0, [r10-32*100] ; in0 8472*c0909341SAndroid Build Coastguard Worker mova m1, [r10-32*96] ; in4 8473*c0909341SAndroid Build Coastguard Worker mova m2, [r10-32*68] ; in8 8474*c0909341SAndroid Build Coastguard Worker mova m3, [r10-32*64] ; in12 8475*c0909341SAndroid Build Coastguard Worker mova m4, [r10-32*36] ; in16 8476*c0909341SAndroid Build Coastguard Worker mova m5, [r10-32*32] ; in20 8477*c0909341SAndroid Build Coastguard Worker mova m6, [r10-32* 4] ; in24 8478*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32* 0] ; in28 8479*c0909341SAndroid Build Coastguard Worker pxor m8, m8 8480*c0909341SAndroid Build Coastguard Worker REPX {mova x, m8}, m9, m10, m11, m12, m13, m14 8481*c0909341SAndroid Build Coastguard Worker mova [rsp], m8 8482*c0909341SAndroid Build Coastguard Worker call m(idct_16x16_internal_8bpc).main 8483*c0909341SAndroid Build Coastguard Worker mova m1, [rsp+32*1] 8484*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m0 8485*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m1 8486*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m2 8487*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m3 8488*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m4 8489*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m5 8490*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m6 8491*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m7 8492*c0909341SAndroid Build Coastguard Worker add r4, 32*8 8493*c0909341SAndroid Build Coastguard Worker mova [r4-32*4], m8 8494*c0909341SAndroid Build Coastguard Worker mova [r4-32*3], m9 8495*c0909341SAndroid Build Coastguard Worker mova [r4-32*2], m10 8496*c0909341SAndroid Build Coastguard Worker mova [r4-32*1], m11 8497*c0909341SAndroid Build Coastguard Worker mova [r4+32*0], m12 8498*c0909341SAndroid Build Coastguard Worker mova [r4+32*1], m13 8499*c0909341SAndroid Build Coastguard Worker mova [r4+32*2], m14 8500*c0909341SAndroid Build Coastguard Worker mova [r4+32*3], m15 8501*c0909341SAndroid Build Coastguard Worker mova m0, [r10-32*98] ; in2 8502*c0909341SAndroid Build Coastguard Worker mova m1, [r10-32*94] ; in6 8503*c0909341SAndroid Build Coastguard Worker mova m2, [r10-32*66] ; in10 8504*c0909341SAndroid Build Coastguard Worker mova m3, [r10-32*62] ; in14 8505*c0909341SAndroid Build Coastguard Worker mova m4, [r10-32*34] ; in18 8506*c0909341SAndroid Build Coastguard Worker mova m5, [r10-32*30] ; in22 8507*c0909341SAndroid Build Coastguard Worker mova m6, [r10-32* 2] ; in26 8508*c0909341SAndroid Build Coastguard Worker mova m7, [r10+32* 2] ; in30 8509*c0909341SAndroid Build Coastguard Worker lea r5, [r4+32*16] 8510*c0909341SAndroid Build Coastguard Worker add r4, 32*8 8511*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast 8512*c0909341SAndroid Build Coastguard Worker mova m0, [r10-32*99] ; in1 8513*c0909341SAndroid Build Coastguard Worker mova m1, [r10+32* 3] ; in31 8514*c0909341SAndroid Build Coastguard Worker mova m2, [r10-32*35] ; in17 8515*c0909341SAndroid Build Coastguard Worker mova m3, [r10-32*61] ; in15 8516*c0909341SAndroid Build Coastguard Worker mova m4, [r10-32*67] ; in9 8517*c0909341SAndroid Build Coastguard Worker mova m5, [r10-32*29] ; in23 8518*c0909341SAndroid Build Coastguard Worker mova m6, [r10-32* 3] ; in25 8519*c0909341SAndroid Build Coastguard Worker mova m7, [r10-32*93] ; in7 8520*c0909341SAndroid Build Coastguard Worker lea r6, [idct64_mul - 8] 8521*c0909341SAndroid Build Coastguard Worker add r4, 32*16 8522*c0909341SAndroid Build Coastguard Worker add r5, 32*32 8523*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 8524*c0909341SAndroid Build Coastguard Worker mova m0, [r10-32*95] ; in5 8525*c0909341SAndroid Build Coastguard Worker mova m1, [r10-32* 1] ; in27 8526*c0909341SAndroid Build Coastguard Worker mova m2, [r10-32*31] ; in21 8527*c0909341SAndroid Build Coastguard Worker mova m3, [r10-32*65] ; in11 8528*c0909341SAndroid Build Coastguard Worker mova m4, [r10-32*63] ; in13 8529*c0909341SAndroid Build Coastguard Worker mova m5, [r10-32*33] ; in19 8530*c0909341SAndroid Build Coastguard Worker mova m6, [r10+32* 1] ; in29 8531*c0909341SAndroid Build Coastguard Worker mova m7, [r10-32*97] ; in3 8532*c0909341SAndroid Build Coastguard Worker add r6, 8 8533*c0909341SAndroid Build Coastguard Worker add r4, 32*8 8534*c0909341SAndroid Build Coastguard Worker sub r5, 32*8 8535*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_8bpc).main_part1 8536*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2_pass2 8537*c0909341SAndroid Build Coastguard Worker add r10, 32*8 8538*c0909341SAndroid Build Coastguard Worker sub dstq, r8 8539*c0909341SAndroid Build Coastguard Worker sub r4, 32*44 8540*c0909341SAndroid Build Coastguard Worker add dstq, 32 8541*c0909341SAndroid Build Coastguard Worker cmp r10, r4 8542*c0909341SAndroid Build Coastguard Worker jl .pass2_loop 8543*c0909341SAndroid Build Coastguard Worker RET 8544*c0909341SAndroid Build Coastguard WorkerALIGN function_align 8545*c0909341SAndroid Build Coastguard Worker.main: 8546*c0909341SAndroid Build Coastguard Worker lea r5, [idct64_mul_16bpc] 8547*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 1] 8548*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*31] 8549*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*17] 8550*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*15] 8551*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8552*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 7] 8553*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*25] 8554*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*23] 8555*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128* 9] 8556*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8557*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 5] 8558*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*27] 8559*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*21] 8560*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*11] 8561*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8562*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 3] 8563*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*29] 8564*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*19] 8565*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*13] 8566*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part1 8567*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_16x64_10bpc).main_part2 8568*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 2] 8569*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*14] 8570*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*18] 8571*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*30] 8572*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part1_fast 8573*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 6] 8574*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*10] 8575*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*22] 8576*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*26] 8577*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_8x32_10bpc).main_oddhalf_part2_fast 8578*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 4] 8579*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128*12] 8580*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*20] 8581*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*28] 8582*c0909341SAndroid Build Coastguard Worker call m(idct_8x16_internal_10bpc).main_oddhalf_fast 8583*c0909341SAndroid Build Coastguard Worker mova m0, [cq+128* 0] 8584*c0909341SAndroid Build Coastguard Worker mova m1, [cq+128* 8] 8585*c0909341SAndroid Build Coastguard Worker mova m2, [cq+128*16] 8586*c0909341SAndroid Build Coastguard Worker mova m3, [cq+128*24] 8587*c0909341SAndroid Build Coastguard Worker pxor m15, m15 8588*c0909341SAndroid Build Coastguard Worker mov r7d, 128*29 8589*c0909341SAndroid Build Coastguard Worker.main_zero_loop: 8590*c0909341SAndroid Build Coastguard Worker mova [cq+r7-128*1], m15 8591*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*0], m15 8592*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*1], m15 8593*c0909341SAndroid Build Coastguard Worker mova [cq+r7+128*2], m15 8594*c0909341SAndroid Build Coastguard Worker sub r7d, 128*4 8595*c0909341SAndroid Build Coastguard Worker jg .main_zero_loop 8596*c0909341SAndroid Build Coastguard Worker call m(inv_txfm_add_dct_dct_64x16_10bpc).main_end 8597*c0909341SAndroid Build Coastguard Worker jmp m(inv_txfm_add_dct_dct_64x16_10bpc).shift_transpose 8598*c0909341SAndroid Build Coastguard Worker 8599*c0909341SAndroid Build Coastguard Worker%endif ; ARCH_X86_64 8600